2015-05-07 15:50:24 +08:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
from sphinx.search import SearchLanguage
|
|
|
|
|
from smallseg import SEG
|
|
|
|
|
|
|
|
|
|
class SearchChinese(SearchLanguage):
|
|
|
|
|
lang = 'zh'
|
|
|
|
|
|
|
|
|
|
def init(self, options):
|
2017-09-19 20:45:29 +08:00
|
|
|
print("reading Chiniese dictionary")
|
2015-05-07 15:50:24 +08:00
|
|
|
self.seg = SEG()
|
|
|
|
|
|
|
|
|
|
def split(self, input):
|
|
|
|
|
return self.seg.cut(input.encode("utf8"))
|
|
|
|
|
|
|
|
|
|
def word_filter(self, stemmed_word):
|
2017-09-19 20:45:29 +08:00
|
|
|
return len(stemmed_word) > 1
|