恢复sphinx配置

2015-01-29 14:43:28 +08:00
parent 68fa0387d9
commit 5627ab4201
7 changed files with 276201 additions and 1 deletions
--- a/exts/chinese_search.py
+++ b/exts/chinese_search.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*- 
+
+def setup(app): 
+    import sphinx.search as search
+    import zh
+    search.languages["zh_CN"] = zh.SearchChinese
--- a/exts/main.dic
+++ b/exts/main.dic
--- a/exts/smallseg.py
+++ b/exts/smallseg.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+import re
+import os
+import sys
+class SEG(object):
+    def __init__(self):
+        _localDir=os.path.dirname(__file__)
+        _curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
+        curpath=_curpath
+        self.d = {}
+        print >> sys.stderr,"loading dict..."
+        self.set([x.rstrip() for x in file(os.path.join(curpath,"main.dic")) ])
+        self.specialwords= set([x.rstrip().decode('utf-8') for x in file(os.path.join(curpath,"suffix.dic"))])
+        print >> sys.stderr,'dict ok.'
+    #set dictionary(a list)
+    def set(self,keywords):
+        p = self.d
+        q = {}
+        k = ''
+        for word in keywords:
+            word = (chr(11)+word).decode('utf-8')
+            if len(word)>5:
+                continue
+            p = self.d
+            ln = len(word)
+            for i in xrange(ln-1,-1,-1):
+                char = word[i].lower()
+                if p=='':
+                    q[k] = {}
+                    p = q[k]
+                if not (char in p):
+                    p[char] = ''
+                    q = p
+                    k = char
+                p = p[char]
+        
+        pass
+    
+    def _binary_seg(self,s):
+        ln = len(s)
+        if ln==1:
+            return [s]
+        R = []
+        for i in xrange(ln,1,-1):
+            tmp = s[i-2:i]
+            R.append(tmp)
+        return R
+    
+    def _pro_unreg(self,piece):
+        #print piece
+        R = []
+        tmp = re.sub(u"。|，|,|！|…|!|《|》|<|>|\"|'|:|：|？|\?|、|\||“|”|‘|’|；|—|（|）|·|\(|\)|　"," ",piece).split()
+        ln1 = len(tmp)
+        for i in xrange(len(tmp)-1,-1,-1):
+            mc = re.split(r"([0-9A-Za-z\-\+#@_\.]+)",tmp[i])
+            for j in xrange(len(mc)-1,-1,-1):
+                r = mc[j]
+                if re.search(r"([0-9A-Za-z\-\+#@_\.]+)",r)!=None:
+                    R.append(r)
+                else:
+                    R.extend(self._binary_seg(r))
+        return R
+        
+        
+    def cut(self,text):
+        """
+        """
+        text = text.decode('utf-8','ignore')
+        p = self.d
+        ln = len(text)
+        i = ln 
+        j = 0
+        z = ln
+        q = 0
+        recognised = []
+        mem = None
+        mem2 = None
+        while i-j>0:
+            t = text[i-j-1].lower()
+            #print i,j,t,mem
+            if not (t in p):
+                if (mem!=None) or (mem2!=None):
+                    if mem!=None:
+                        i,j,z = mem
+                        mem = None
+                    elif mem2!=None:
+                        delta = mem2[0]-i
+                        if delta>=1:
+                            if (delta<5) and (re.search(ur"[\w\u2E80-\u9FFF]",t)!=None):
+                                pre = text[i-j]
+                                #print pre
+                                if not (pre in self.specialwords):
+                                    i,j,z,q = mem2
+                                    del recognised[q:]
+                            mem2 = None
+                            
+                    p = self.d
+                    if((i<ln) and (i<z)):
+                        unreg_tmp = self._pro_unreg(text[i:z])
+                        recognised.extend(unreg_tmp)
+                    recognised.append(text[i-j:i])
+                    #print text[i-j:i],mem2
+                    i = i-j
+                    z = i
+                    j = 0
+                    continue
+                j = 0
+                i -= 1
+                p = self.d
+                continue
+            p = p[t]
+            j+=1
+            if chr(11) in p:
+                if j<=2:
+                    mem = i,j,z
+                    #print text[i-1]
+                    if (z-i<2) and (text[i-1] in self.specialwords) and ((mem2==None) or ((mem2!=None and mem2[0]-i>1))):
+                        #print text[i-1]
+                        mem = None
+                        mem2 = i,j,z,len(recognised)
+                        p = self.d
+                        i -= 1
+                        j = 0
+                    continue
+                    #print mem
+                p = self.d
+                #print i,j,z,text[i:z]
+                if((i<ln) and (i<z)):
+                    unreg_tmp = self._pro_unreg(text[i:z])
+                    recognised.extend(unreg_tmp)
+                recognised.append(text[i-j:i])
+                i = i-j
+                z = i
+                j = 0
+                mem = None
+                mem2 = None
+        #print mem
+        if mem!=None:
+            i,j,z = mem
+            recognised.extend(self._pro_unreg(text[i:z]))
+            recognised.append(text[i-j:i])        
+        else:
+            recognised.extend(self._pro_unreg(text[i-j:z]))
+        return recognised
--- a/exts/suffix.dic
+++ b/exts/suffix.dic
@@ -0,0 +1,114 @@
+和
+是
+了
+中
+有
+都
+的
+来
+在
+次
+还
+但
+为
+里
+用
+外
+上
+下
+就
+以
+去
+即
+丁
+万
+乔
+余
+候
+傅
+冯
+刘
+单
+卢
+史
+叶
+吕
+吴
+唐
+夏
+姚
+姜
+孔
+孙
+孟
+宋
+尹
+崔
+常
+康
+廖
+张
+彭
+徐
+戴
+文
+方
+易
+曹
+曾
+朱
+李
+杜
+杨
+林
+梁
+武
+段
+毛
+江
+汤
+沈
+潘
+熊
+王
+田
+白
+石
+秦
+罗
+肖
+胡
+苏
+范
+董
+蒋
+薛
+袁
+谢
+谭
+贾
+赖
+赵
+邓
+邱
+邵
+邹
+郑
+郝
+郭
+金
+钟
+钱
+阎
+陆
+陈
+雷
+韩
+顾
+马
+高
+魏
+黄
+黎
+龙
+龚
--- a/exts/zh.py
+++ b/exts/zh.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+from sphinx.search import SearchLanguage
+from smallseg import SEG 
+
+class SearchChinese(SearchLanguage):
+    lang = 'zh'
+
+    def init(self, options):
+        print "reading Chiniese dictionary"
+        self.seg = SEG() 
+
+    def split(self, input):
+        return self.seg.cut(input.encode("utf8")) 
+
+    def word_filter(self, stemmed_word):
+        return len(stemmed_word) > 1
--- a/requirements.txt
+++ b/requirements.txt
--- a/source/c08/p10_using_lazily_computed_properties.rst
+++ b/source/c08/p10_using_lazily_computed_properties.rst
@@ -5,7 +5,11 @@
 ----------
 问题
 ----------
-todo...
+You’d like to define a read-only attribute as a property that only gets computed on access.
+However, once accessed, you’d like the value to be cached and not recomputed on each
+access.
+
+

 ----------
 解决方案