两点水

2017-12-26 11:14:36 +08:00
parent c7c4d7a559
commit 1a74b345e5
15 changed files with 799 additions and 0 deletions
--- a/Code/Python14Code/com/init.py
+++ b/Code/Python14Code/com/init.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
--- a/Code/Python14Code/com/twowater/init.py
+++ b/Code/Python14Code/com/twowater/init.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
--- a/Code/Python14Code/com/twowater/test1.py
+++ b/Code/Python14Code/com/twowater/test1.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+# 设定一个常量
+a = '两点水|twowater|liangdianshui|草根程序员|ReadingWithU'
+
+# 判断是否有 “两点水” 这个字符串，使用 PY 自带函数
+
+print('a 是否含有“两点水”这个字符串：{0}'.format(a.index('两点水') > -1))
+print('a 是否含有“两点水”这个字符串：{0}'.format('两点水' in a))
+
+# 正则表达式
+
+findall = re.findall('两点水', a)
+print(findall)
+
+if len(findall) > 0:
+    print('a 含有“两点水”这个字符串')
+else:
+    print('a 不含有“两点水”这个字符串')
+
+# 选择 a 里面的所有小写英文字母
+
+re_findall = re.findall('[a-z]', a)
+
+print(re_findall)
--- a/Code/Python14Code/com/twowater/test2.py
+++ b/Code/Python14Code/com/twowater/test2.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = 'uav,ubv,ucv,uwv,uzv,ucv,uov'
+
+# 字符集
+
+# 取 u 和 v 中间是 a 或 b 或 c 的字符
+findall = re.findall('u[abc]v', a)
+print(findall)
+# 如果是连续的字母，数字可以使用 - 来代替
+l = re.findall('u[a-c]v', a)
+print(l)
+
+# 取 u 和 v 中间不是 a 或 b 或 c 的字符
+re_findall = re.findall('u[^abc]v', a)
+print(re_findall)
--- a/Code/Python14Code/com/twowater/test3.py
+++ b/Code/Python14Code/com/twowater/test3.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = 'uav_ubv_ucv_uwv_uzv_ucv_uov&123-456-789'
+
+# 概括字符集
+
+# \d 相当于 [0-9] ,匹配所有数字字符
+# \D 相当于 [^0-9] ， 匹配所有非数字字符
+findall1 = re.findall('\d', a)
+findall2 = re.findall('[0-9]', a)
+findall3 = re.findall('\D', a)
+findall4 = re.findall('[^0-9]', a)
+print(findall1)
+print(findall2)
+print(findall3)
+print(findall4)
+
+# \w 匹配包括下划线的任何单词字符，等价于 [A-Za-z0-9_]
+findall5 = re.findall('\w', a)
+findall6 = re.findall('[A-Za-z0-9_]', a)
+print(findall5)
+print(findall6)
--- a/Code/Python14Code/com/twowater/test4.py
+++ b/Code/Python14Code/com/twowater/test4.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = 'java*&39android##@@python'
+
+# 数量词
+
+findall = re.findall('[a-z]{4,7}', a)
+print(findall)
+
+# 贪婪与非贪婪
+
+re_findall = re.findall('[a-z]{4,7}?', a)
+print(re_findall)
--- a/Code/Python14Code/com/twowater/test5.py
+++ b/Code/Python14Code/com/twowater/test5.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = '347073565'
+
+# 边界匹配符
+
+findall = re.findall('\d{6}565$', a)
+
+print(findall)
--- a/Code/Python14Code/com/twowater/test6.py
+++ b/Code/Python14Code/com/twowater/test6.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = 'pythonpythonpython'
+
+# 组
+
+findall = re.findall('(python){3}', a)
+print(findall)
+
+
--- a/Code/Python14Code/com/twowater/test7.py
+++ b/Code/Python14Code/com/twowater/test7.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+import re
+
+a = 'Python*Android*Java-888'
+
+# 把字符串中的 * 字符替换成 & 字符
+sub1 = re.sub('\*', '&', a)
+print(sub1)
+
+# 把字符串中的第一个 * 字符替换成 & 字符
+sub2 = re.sub('\*', '&', a, 1)
+print(sub2)
+
+
+# 把字符串中的 * 字符替换成 & 字符,把字符 - 换成 |
+
+# 1、先定义一个函数
+def convert(value):
+    group = value.group()
+    if (group == '*'):
+        return '&'
+    elif (group == '-'):
+        return '|'
+
+
+# 第二个参数，要替换的字符可以为一个函数
+sub3 = re.sub('[\*-]', convert, a)
+print(sub3)
--- a/Code/Python14Code/com/twowater/test8.py
+++ b/Code/Python14Code/com/twowater/test8.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+# 提取图片的地址
+
+import re
+
+a = '<img src="https://s-media-cache-ak0.pinimg.com/originals/a8/c4/9e/a8c49ef606e0e1f3ee39a7b219b5c05e.jpg">'
+
+# 使用 re.search
+search = re.search('<img src="(.*)">', a)
+# group(0) 是一个完整的分组
+print(search.group(0))
+print(search.group(1))
+
+# 使用 re.findall
+findall = re.findall('<img src="(.*)">', a)
+print(findall)
+
+# 多个分组的使用（比如我们需要提取 img 字段和图片地址字段）
+re_search = re.search('<(.*) src="(.*)">', a)
+# 打印 img
+print(re_search.group(1))
+# 打印图片地址
+print(re_search.group(2))
+# 打印 img 和图片地址，以元祖的形式
+print(re_search.group(1, 2))
+# 或者使用 groups
+print(re_search.groups())