coreseek为了扩展,增加了python数据源功能,从而得以无限扩展Coreseek/Sphinx的数据获取功能。
这样做的话,coreseek就非常灵活,基本上可以对所有的数据建立搜索引擎,只要有自己的python数据源基类。
这里配一下 Microsoft SQL Server 和 python基类的写法:
csft_python.conf
python
{
path = /to/coreseek-3.2.14/testpack/etc/pysource
path = /to/coreseek-3.2.14/testpack/etc/pysource/csft_demo_pymssql #BSD、Linux环境下设置
}
source python_demo
{
type = python
name = csft_demo_pymssql.MainSource
#name = MainSource
}
index python_demo
{
source = python_demo #对应的source名称
path = var/data/python_demo
#docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
#charset_dictpath = etc/ #Windows环境下设置,/符号结尾
charset_type = utf-8
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9353
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = var/log/searchd_python.pid
log = var/log/searchd_python.log
query_log = var/log/query_python.log
}
/to/coreseek-3.2.14/testpack/etc/pysource/csft_demo_pymssql
下的__init__.py
# -*- coding: UTF-8 -*-
from os import path
import os
import sys
import pymssql
import datetime
class MainSource(object):
def __init__(self, conf):
self.conf = conf
self.idx = 0
self.data = []
self.conn = None
self.cur = None
def GetScheme(self): #获取结构,docid、文本、整数
return [
('id' , {'docid':True, } ),
('xx', { 'type':'text'} ),
('xx1', {'type':'integer'} ),
]
def GetFieldOrder(self): #字段的优先顺序
return [('name')]
def Connected(self): #如果是数据库,则在此处做数据库连接
if self.conn==None:
self.conn = pymssql.connect(host='192.168.40.105', user='sa', password='987654321sa', database='xxx', as_dict=True,charset='cp936')
self.cur = self.conn.cursor()
sql = 'SELECT top 12 id,xx,xx1 FROM tbxx'
self.cur.execute(sql)
self.data = [ row for row in self.cur]
pass
def NextDocument(self): #取得每一个文档记录的调用
if self.idx < len(self.data):
try:
item = self.data[self.idx]
self.docid = self.id = item[0] #'docid':True
self.xx = item[1].encode('utf-8')
self.xx1 = 1
self.idx += 1
return True
except:
return False
else:
return False
if __name__ == "__main__": #直接访问演示部分
conf = {}
source = MainSource(conf)
source.Connected()
while source.NextDocument():
print "id=%d, subject=%s" % (source.docid, source.xx)
pass
#eof
参考地址:http://www.coreseek.cn/products-install/python/
posted on 2013-03-14 17:38
漂漂 阅读(1470)
评论(0) 编辑 收藏 引用