diff --git a/README.rst b/README.rst index be90ebf..67f5dab 100644 --- a/README.rst +++ b/README.rst @@ -24,9 +24,9 @@ -------------------------------------------------------------- -++++++++ -备注 -++++++++ +++++++++++++++++ +项目说明 +++++++++++++++++ 1. 原版PDF下载地址: http://pan.baidu.com/s/1dDhByJv #. 所有文档均使用reStructuredText编辑,参考 reStructuredText_ #. 当前文档生成托管在 readthedocs_ 上 @@ -51,3 +51,31 @@ .. _reStructuredText: http://docutils.sourceforge.net/docs/user/rst/quickref.html .. _python3-cookbook: http://python3-cookbook.readthedocs.org/zh_CN/latest/ +----------------------------------------------------- +## How to Contribute + +You are welcome to contribute to mango-test as follow + +* add/edit wiki +* report/fix issue +* code review +* commit new feature +* add testcase + +Meanwhile you'd better follow the rules below + +* It's *NOT* recommended to submit a pull request directly to `master` branch. `develop` branch is more appropriate +* Follow common Python coding conventions +* Add the following [license](#license) in each source file + +## License + +(The Apache License) + +Copyright (c) 2013-2014 [WinHong, Inc.](http://www.winhong.com/) and other contributors + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/basic/core/deco.py b/basic/core/deco.py deleted file mode 100644 index 16a31b4..0000000 --- a/basic/core/deco.py +++ /dev/null @@ -1,55 +0,0 @@ -# encoding: utf-8 -""" - Topic: sample - Desc : 利用闭包演示带参的装饰器,错误示范 -""" -from time import time - -__author__ = 'Xiong Neng' - - -def logged(when): - def log(f, *args, **kargs): - print('Called: function: %s, args: %r, kargs: %r' % (f, args, kargs)) - - def pre_logged(f, *args, **kargs): - log(f, *args, **kargs) - return f(*args, **kargs) - - def post_logged(f, *args, **kargs): - now = time() - try: - return f(*args, **kargs) - finally: - log(f, *args, **kargs) - print('time delta: %s' % (time() - now)) - - try: - return {'pre': pre_logged, 'post': post_logged}[when] - except KeyError as e: - raise ValueError(e, 'must be "pre" or "post"') - - -@logged('post') -def hello(name): - print('Hello', name) - - -hello(*('World!',)) - - -def printparams(a, b, c, d, e=12): - print((a, b, c, d, e)) - - -def callParams(f): - return lambda *a, **b: f(*a, **b) - - -def callParams2(f, *a, **b): - print('len: ', len(a)) - return f(*a, **b) - - -callParams(printparams(1, *(2, 3), **{'d': '33', 'e': 333})) -callParams2(printparams, 1, *(2, 3), **{'d': '33', 'e': 333}) \ No newline at end of file diff --git a/basic/func/deco.py b/basic/func/deco.py new file mode 100644 index 0000000..63400ea --- /dev/null +++ b/basic/func/deco.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +""" + Topic: sample + Desc : 利用闭包演示带参的装饰器 +""" +from time import time +from functools import wraps + +__author__ = 'Xiong Neng' + + +def logged(when): + def log(f, *args, **kargs): + print('Called: function: %s, args: %r, kargs: %r' % (f, args, kargs)) + + def pre_decorator(func): + @wraps(func) + def func_wrapper(*args, **kargs): + log(func, *args, **kargs) + return func(*args, **kargs) + return func_wrapper + + def post_decorator(func): + @wraps(func) + def func_wrapper(*args, **kwargs): + now = time() + try: + return func(*args, **kwargs) + finally: + log(func, *args, **kwargs) + print('time delta: %s' % (time() - now)) + return func_wrapper + + try: + return {'pre': pre_decorator, 'post': post_decorator}[when] + except KeyError as e: + raise ValueError(e, 'must be "pre" or "post"') + + +@logged('post') +def hello(name): + print('Hello', name) + + +hello('world') diff --git a/basic/core/yield.py b/basic/func/yield.py similarity index 100% rename from basic/core/yield.py rename to basic/func/yield.py diff --git a/basic/main.py b/basic/main.py deleted file mode 100644 index 1a911e5..0000000 --- a/basic/main.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- -""" - Topic: sample - Desc : -""" -import sys, os - -__author__ = 'Xiong Neng' - - -def test_f(fmt, *args, exc_info, extra): - pass - - -if __name__ == '__main__': - print(chr(57344)) - print(ord('\ue000')) - print("clients/command/{}".format('001')) diff --git a/basic/pika/__init__.py b/basic/pika/__init__.py deleted file mode 100644 index 0baca96..0000000 --- a/basic/pika/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- -""" -Topic: sample -Desc : -""" - diff --git a/basic/pika/emitlog.py b/basic/pika/emitlog.py deleted file mode 100644 index cc84202..0000000 --- a/basic/pika/emitlog.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -import pika -import sys - -connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) -channel = connection.channel() - -channel.exchange_declare(exchange='logs') - -message = ' '.join(sys.argv[1:]) or "info: Hello World!" -channel.basic_publish(exchange='logs', routing_key='log', body=message) -print("[x] Sent %r" % (message,)) -connection.close() \ No newline at end of file diff --git a/basic/pika/receivelog.py b/basic/pika/receivelog.py deleted file mode 100644 index 294a997..0000000 --- a/basic/pika/receivelog.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -import pika - -connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) -channel = connection.channel() -channel.exchange_declare(exchange='logs') -result = channel.queue_declare(queue='logqueue') -queue_name = result.method.queue -# 新建一个queue,并且把它绑定到一个exchange上面,并且指定routing_key -channel.queue_bind(exchange='logs', routing_key='log', queue=queue_name) -print('[*] Waiting for logs. To exit press CTRL+C') - - -def callback(ch, method, properties, body): - print("[x] %r" % (body,)) - -# 这个就是直接消费一个queue了 -channel.basic_consume(callback, queue=queue_name, no_ack=True) -channel.start_consuming() \ No newline at end of file diff --git a/basic/regex/re_sub.py b/basic/regex/re_sub.py index 3704b81..5ecba8a 100644 --- a/basic/regex/re_sub.py +++ b/basic/regex/re_sub.py @@ -67,5 +67,10 @@ def re_sub(): if __name__ == '__main__': - re_sub() + pp = re.compile(r'((http|https|ftp)://[a-zA-Z0-9+\-&@#/%?=~_|!:,.;]*[a-zA-Z0-9+\-&@#/%=~_|])') + aa = 'one: http://www.baidu.com/ two' + print(pp.sub('', aa)) + # print(pp.findall(aa)) + # for m in pp.finditer(aa): + # print(m.group()) diff --git a/commons/main.py b/commons/main.py new file mode 100644 index 0000000..44c8c66 --- /dev/null +++ b/commons/main.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +""" + Topic: sample + Desc : +""" +import sys, os + +__author__ = 'Xiong Neng' + + +# def test_f(fmt, *args, exc_info, extra): +# return 2 if True else None + +def aa(): + for x in range(1, 10): + for y in range(1, x + 2): + yield '%d * %d = %d\t' % (y, x, x * y) if y <= x else '\n' + print('ddd') +b=1 +def bb(): + a=b+2 + print(a) + + +if __name__ == '__main__': + a = 1 + bb() + diff --git a/commons/requests/advanced.py b/commons/requests/advanced.py index 0fcaee7..4813156 100644 --- a/commons/requests/advanced.py +++ b/commons/requests/advanced.py @@ -5,7 +5,6 @@ Topic: 高级主题 """ import requests import re -from PIL import Image from io import StringIO import json from requests import Request, Session diff --git a/commons/requests/authentication.py b/commons/requests/authentication.py index 98c1e3a..5ae1b91 100644 --- a/commons/requests/authentication.py +++ b/commons/requests/authentication.py @@ -5,7 +5,6 @@ Topic: 认证 """ import requests import re -from PIL import Image from io import StringIO import json from requests import Request, Session diff --git a/commons/requests/lover.py b/commons/requests/lover.py new file mode 100644 index 0000000..a3e7dbf --- /dev/null +++ b/commons/requests/lover.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +""" +Topic: 每天一句情话 +""" +import requests +import re +from io import StringIO +import json +import xml.etree.ElementTree as ET + + +def extract_content(xml): + """xpath解析,或者使用lxml库""" + doc = ET.fromstring(xml) + tt= doc.findall("//div[@class='articleText']") + print(tt) + + +def lover_sentences_01(): + """获取情话网的情话列表!""" + urls = ['http://www.siandian.com/qinghua/510.html', + 'http://www.siandian.com/qinghua/510_2.html', + 'http://www.siandian.com/qinghua/1608.html'] + for url in urls: + # 读取返回结果 + r = requests.get(url) + # 改变r.encoding + encoding = re.search('content="text/html;\s*charset=(.*?)"', r.text).group(1) + r.encoding = encoding + finds = re.finditer(r'

\s*(((?!', r.text) + for f in finds: + print(f.group(1)) + + +if __name__ == '__main__': + lover_sentences_01() diff --git a/commons/requests/quickstart.py b/commons/requests/quickstart.py index 312c3c1..1351d66 100644 --- a/commons/requests/quickstart.py +++ b/commons/requests/quickstart.py @@ -5,7 +5,6 @@ Topic: requests入门 """ import requests import re -from PIL import Image from io import StringIO import json diff --git a/commons/requests/realworld.py b/commons/requests/realworld.py index 8e974dc..f5ad462 100644 --- a/commons/requests/realworld.py +++ b/commons/requests/realworld.py @@ -5,7 +5,6 @@ Topic: 实战演练 """ import requests import re -from PIL import Image from io import StringIO import json from requests import Request, Session @@ -13,7 +12,6 @@ from contextlib import closing from requests.auth import AuthBase from requests.auth import HTTPBasicAuth from requests.auth import HTTPDigestAuth -from requests_oauthlib import OAuth2 import xml.etree.ElementTree as ET diff --git a/cookbook/c06/p12_var_binary.py b/cookbook/c06/p12_var_binary.py new file mode 100644 index 0000000..7622bbe --- /dev/null +++ b/cookbook/c06/p12_var_binary.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +""" +Topic: 读写变长的二进制格式文件 +Desc : +""" +import struct +import itertools + + +def write_polys(filename, polys): + # Determine bounding box + flattened = list(itertools.chain(*polys)) + min_x = min(x for x, y in flattened) + max_x = max(x for x, y in flattened) + min_y = min(y for x, y in flattened) + max_y = max(y for x, y in flattened) + with open(filename, 'wb') as f: + f.write(struct.pack('>> f = open('polys.bin', 'rb') + >>> phead = PolyHeader(f.read(40)) + >>> phead.file_code == 0x1234 + True + >>> phead.min_x + 0.5 + >>> phead.min_y + 0.5 + >>> phead.max_x + 7.0 + >>> phead.max_y + 9.2 + >>> phead.num_polys + 3 + >>> + +这个很有趣,不过这种方式还是有一些烦人的地方。首先,尽管你获得了一个类接口的便利, +但是这个代码还是有点臃肿,还需要使用者指定很多底层的细节(比如重复使用 ``StructField`` ,指定偏移量等)。 +另外,返回的结果类同样确实一些便利的方法来计算结构的总数。 + +任何时候只要你遇到了像这样冗余的类定义,你应该考虑下使用类装饰器或元类。 +元类有一个特性就是它能够被用来填充许多低层的实现细节,从而释放使用者的负担。 +下面我来举个例子,使用元类稍微改造下我们的 ``Structure`` 类: + +.. code-block:: python + + class StructureMeta(type): + ''' + Metaclass that automatically creates StructField descriptors + ''' + def __init__(self, clsname, bases, clsdict): + fields = getattr(self, '_fields_', []) + byte_order = '' + offset = 0 + for format, fieldname in fields: + if format.startswith(('<','>','!','@')): + byte_order = format[0] + format = format[1:] + format = byte_order + format + setattr(self, fieldname, StructField(format, offset)) + offset += struct.calcsize(format) + setattr(self, 'struct_size', offset) + + class Structure(metaclass=StructureMeta): + def __init__(self, bytedata): + self._buffer = bytedata + + @classmethod + def from_file(cls, f): + return cls(f.read(cls.struct_size)) + +使用新的 ``Structure`` 类,你可以像下面这样定义一个结构: + +.. code-block:: python + + class PolyHeader(Structure): + _fields_ = [ + ('>> f = open('polys.bin', 'rb') + >>> phead = PolyHeader.from_file(f) + >>> phead.file_code == 0x1234 + True + >>> phead.min_x + 0.5 + >>> phead.min_y + 0.5 + >>> phead.max_x + 7.0 + >>> phead.max_y + 9.2 + >>> phead.num_polys + 3 + >>> + +一旦你开始使用了元类,你就可以让它变得更加智能。例如,假设你还想支持嵌套的字节结构, +下面是对前面元类的一个小的改进,提供了一个新的辅助描述器来达到想要的效果: + +.. code-block:: python + + class NestedStruct: + ''' + Descriptor representing a nested structure + ''' + def __init__(self, name, struct_type, offset): + self.name = name + self.struct_type = struct_type + self.offset = offset + + def __get__(self, instance, cls): + if instance is None: + return self + else: + data = instance._buffer[self.offset: + self.offset+self.struct_type.struct_size] + result = self.struct_type(data) + # Save resulting structure back on instance to avoid + # further recomputation of this step + setattr(instance, self.name, result) + return result + + class StructureMeta(type): + ''' + Metaclass that automatically creates StructField descriptors + ''' + def __init__(self, clsname, bases, clsdict): + fields = getattr(self, '_fields_', []) + byte_order = '' + offset = 0 + for format, fieldname in fields: + if isinstance(format, StructureMeta): + setattr(self, fieldname, + NestedStruct(fieldname, format, offset)) + offset += format.struct_size + else: + if format.startswith(('<','>','!','@')): + byte_order = format[0] + format = format[1:] + format = byte_order + format + setattr(self, fieldname, StructField(format, offset)) + offset += struct.calcsize(format) + setattr(self, 'struct_size', offset) + +在这段代码中,``NestedStruct`` 描述器被用来叠加另外一个定义在某个内存区域上的结构。 +它通过将原始内存缓冲进行切片操作后实例化给定的结构类型。由于底层的内存缓冲区是通过一个内存视图初始化的, +所以这种切片操作不会引发任何的额外的内存复制。相反,它仅仅就是之前的内存的一个叠加而已。 +另外,为了防止重复实例化,通过使用和8.10小节同样的技术,描述器保存了该实例中的内部结构对象。 + +使用这个新的修正版,你就可以像下面这样编写: + +.. code-block:: python + + class Point(Structure): + _fields_ = [ + ('>> f = open('polys.bin', 'rb') + >>> phead = PolyHeader.from_file(f) + >>> phead.file_code == 0x1234 + True + >>> phead.min # Nested structure + <__main__.Point object at 0x1006a48d0> + >>> phead.min.x + 0.5 + >>> phead.min.y + 0.5 + >>> phead.max.x + 7.0 + >>> phead.max.y + 9.2 + >>> phead.num_polys + 3 + >>> + +到目前为止,一个处理定长记录的框架已经写好了。但是如果组件记录是变长的呢? +比如,多边形文件包含变长的部分。 + +一种方案是写一个类来表示字节数据,同时写一个工具函数来通过多少方式解析内容。跟6.11小节的代码很类似: + +.. code-block:: python + + class SizedRecord: + def __init__(self, bytedata): + self._buffer = memoryview(bytedata) + + @classmethod + def from_file(cls, f, size_fmt, includes_size=True): + sz_nbytes = struct.calcsize(size_fmt) + sz_bytes = f.read(sz_nbytes) + sz, = struct.unpack(size_fmt, sz_bytes) + buf = f.read(sz - includes_size * sz_nbytes) + return cls(buf) + + def iter_as(self, code): + if isinstance(code, str): + s = struct.Struct(code) + for off in range(0, len(self._buffer), s.size): + yield s.unpack_from(self._buffer, off) + elif isinstance(code, StructureMeta): + size = code.struct_size + for off in range(0, len(self._buffer), size): + data = self._buffer[off:off+size] + yield code(data) + +类方法 ``SizedRecord.from_file()`` 是一个工具,用来从一个文件中读取带大小前缀的数据块, +这也是很多文件格式常用的方式。作为输入,它接受一个包含大小编码的结构格式编码,并且也是自己形式。 +可选的 ``includes_size`` 参数指定了字节数是否包含头部大小。 +下面是一个例子教你怎样使用从多边形文件中读取单独的多边形数据: + +.. code-block:: python + + >>> f = open('polys.bin', 'rb') + >>> phead = PolyHeader.from_file(f) + >>> phead.num_polys + 3 + >>> polydata = [ SizedRecord.from_file(f, '>> polydata + [<__main__.SizedRecord object at 0x1006a4d50>, + <__main__.SizedRecord object at 0x1006a4f50>, + <__main__.SizedRecord object at 0x10070da90>] + >>> + +可以看出,``SizedRecord`` 实例的内容还没有被解析出来。 +可以使用 ``iter_as()`` 方法来达到目的,这个方法接受一个结构格式化编码或者是 ``Structure`` 类作为输入。 +这样子可以很灵活的去解析数据,例如: + +.. code-block:: python + + >>> for n, poly in enumerate(polydata): + ... print('Polygon', n) + ... for p in poly.iter_as('>> + + >>> for n, poly in enumerate(polydata): + ... print('Polygon', n) + ... for p in poly.iter_as(Point): + ... print(p.x, p.y) + ... + Polygon 0 + 1.0 2.5 + 3.5 4.0 + 2.5 1.5 + Polygon 1 + 7.0 1.2 + 5.1 3.0 + 0.5 7.5 + 0.8 9.0 + Polygon 2 + 3.4 6.3 + 1.2 0.5 + 4.6 9.2 + >>> + +将所有这些结合起来,下面是一个 ``read_polys()`` 函数的另外一个修正版: + +.. code-block:: python + + class Point(Structure): + _fields_ = [ + ('表示高位优先), +那后面所有字段的顺序都以这个顺序为准。这么做可以帮助避免额外输入,但是在定义的中间我们仍然可能切换顺序的。 +比如,你可能有一些比较复杂的结构,就像下面这样: + +.. code-block:: python + + class ShapeFile(Structure): + _fields_ = [ ('>i', 'file_code'), # Big endian + ('20s', 'unused'), + ('i', 'file_length'), + ('