import xapian, config
from mmseg.search import seg_txt_2_dict
class Xapian():
"""xapian search class """
def __init__(self):
"""init xapian search class
:returns: class
"""
self.db = xapian.WritableDatabase(config.xapian_index_dir, xapian.DB_CREATE_OR_OPEN)
self.enquire = xapian.Enquire(self.db)
self.enquire.set_sort_by_value(1, True)
def get_document(self, id):
"""获取doc
:id: id
:returns: Document
"""
return self.db.get_document(id)
def delete_document(self,id):
"""删除索引
:id: 索引id
"""
try:
return self.db.delete_document(id)
except:
return None
def update_index(self, id, text=None, values=None, data=None):
"""更新索引
:id: 要替换的id
:doc: 新的doc
"""
try:
doc = self.get_document(id)
except:
return False
if text:
doc.clear_terms()#清除terms
for word, value in seg_txt_2_dict(text).iteritems():
doc.add_term(word)
if values:
doc.clear_values()
for key, value in values.iteritems():
doc.add_value(key, value)
if data:
doc.set_data(data)
try:
self.db.replace_document(id, doc)
return True
except:
return False
def index(self, id, text, values={}, data=''):
"""index to xapian
:id: data id
:text: search content is utf-8
:returns: boolean
"""
doc = xapian.Document()
for word, value in seg_txt_2_dict(text).iteritems():
print word, value
doc.add_term(word)
#添加value用于排序,key似乎只能是数字
for key, value in values.iteritems():
doc.add_value(key, value)
if data:
doc.set_data(data)
try:
self.db.replace_document(id, doc)
return True
except:
return False
def search(self, keywords, offset=0, limit=10):
"""search xapian
:keywords: 搜索的关键字
:offset: 起始位置
:limit: 结束位置
:returns: matches对象
"""
query_list = []
for word, value in seg_txt_2_dict(keywords.encode('utf-8')).iteritems():
query = xapian.Query(word)
query_list.append(query)
if len(query_list) != 1:
query = xapian.Query(xapian.Query.OP_AND, query_list)
else:
query = query_list[0]
self.enquire.set_query(query)
matches = self.enquire.get_mset(offset, limit, 10000)
return matches
def flush(self):
"""flush to disk
:returns: flush结果
"""
return self.db.flush()
search = Xapian()
#//python/8953