Compare commits

...

10 Commits

Author SHA1 Message Date
Jimmy Xiang
1db97bfb2a add pdf_img 2019-11-07 01:03:28 +08:00
Jimmy Xiang
6e7a3172d5 Merge branch 'master' of https://github.com/flypythoncom/flypython 2019-10-28 17:24:09 +08:00
Jimmy Xiang
10418167b3 add pdf 2019-10-28 17:23:05 +08:00
Jimmy Xiang
d6702d2071 Add files via upload 2019-10-16 11:27:37 +08:00
JINCHONGJIU
9281cb885a wordcloud_hlm 2019-10-12 16:07:14 +08:00
JINCHONGJIU
d00e2eafef wordcloud_hlm_seg 2019-10-12 15:54:32 +08:00
FlyPython
c79818e78b Update sendemail.py 2019-09-30 17:56:08 +08:00
FlyPython
fb56f858f4 python email
使用python自动发送邮件
2019-09-30 17:33:16 +08:00
FlyPython
7df0908154 Update mooncake.py 2019-09-13 19:12:42 +08:00
FlyPython
196d46f08e Add files via upload 2019-09-13 18:45:38 +08:00
13 changed files with 3755 additions and 0 deletions

30
convert.py Normal file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python3
import requests
import os,json
from requests_toolbelt import MultipartEncoder
url = "http://localhost/unoconv/pdf"
def post_file(url,path):
filename = os.path.basename(path)
convert_name = str(filename).split('.')[0] + '.pdf'
m = MultipartEncoder(
fields= {
'file':(filename,open(path,'rb')),
}
)
response = requests.request('POST', url, data=m, headers={'Content-Type':m.content_type})
with open(convert_name, 'wb') as f:
f.write(response.content)
return convert_name
path = "./demo.docx"
ret = post_file(url, path)
print(ret)

76
mooncake.py Normal file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python2
#encoding=utf-8
import turtle as tt
import math
tt.hideturtle()
tt.speed(10)
def draw_circle(r):
tt.penup()
tt.goto(0, -r)
tt.seth(0)
tt.pendown()
tt.pensize(5)
tt.color('#F8CD32','#FBA92D')
tt.begin_fill()
tt.circle(r)
tt.end_fill()
def draw_petal(r, n):
tt.penup()
tt.goto(0, -r)
tt.seth(0)
tt.pendown()
small_r = math.sin( math.pi/n) * r
for i in range(n):
tt.penup()
tt.home()
tt.seth((360/n)*i)
tt.fd(r)
tt.left((360/n)*0.5)
tt.pendown()
tt.color('#F0BE7C')
tt.begin_fill()
tt.circle(small_r,180)
tt.end_fill()
def draw_square(d, r):
tt.penup()
tt.seth(0)
tt.goto(d/2 + r, -d/2)
tt.left(90)
tt.pendown()
for i in range(4):
tt.fd(d)
tt.circle(r, 90)
def draw_word(word, x, y):
tt.penup()
tt.goto(x, y)
tt.pendown()
tt.color("Gold")
tt.write(word, font=("微软雅黑",35, "normal"))
def draw():
tt.title("FlyPython祝您中秋快乐")
draw_circle(120)
draw_petal(120,18)
#draw_square(100,10)
draw_word("",-50,5)
draw_word("",0,5)
draw_word("",-50,-40)
draw_word("",0,-40)
tt.done()
if __name__ == "__main__":
draw()

BIN
pdf/01.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

BIN
pdf/02.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

BIN
pdf/03.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
pdf/04.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

BIN
pdf/img/test.pdf_img1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

16
pdf/pdf.py Normal file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env python3
import pdfplumber
import pandas as pd
with pdfplumber.open("test.pdf") as pdf:
first_page = pdf.pages[0]
text = first_page.extract_text()
print(text)
second_page = pdf.pages[1]
table = second_page.extract_tables()
for t in table:
df = pd.DataFrame(t[1:],columns=t[0])
print(df)

54
pdf/pdf_img.py Normal file
View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
import fitz #pip install pymupdf
import re
import os
def find_imag(path,img_path):
checkXO = r"/Type(?= */XObject)"
checkIM = r"/Subtype(?= */Image)"
pdf = fitz.open(path)
img_count = 0
len_XREF = pdf._getXrefLength()
print("文件名:{}, 页数: {}, 对象: {}".format(path, len(pdf), len_XREF - 1))
for i in range(1, len_XREF):
text = pdf._getXrefString(i)
isXObject = re.search(checkXO, text)
# 使用正则表达式查看是否是图片
isImage = re.search(checkIM, text)
# 如果不是对象也不是图片则continue
if not isXObject or not isImage:
continue
img_count += 1
# 根据索引生成图像
pix = fitz.Pixmap(pdf, i)
new_name = path.replace('\\', '_') + "_img{}.png".format(img_count)
new_name = new_name.replace(':', '')
# 如果pix.n<5,可以直接存为PNG
if pix.n < 5:
pix.writePNG(os.path.join(img_path, new_name))
else:
pix0 = fitz.Pixmap(fitz.csRGB, pix)
pix0.writePNG(os.path.join(img_path, new_name))
pix0 = None
pix = None
print("提取了{}张图片".format(img_count))
if __name__=='__main__':
pdf_path = r'test.pdf'
img_path = r'img'
m = find_imag(pdf_path, img_path)

BIN
pdf/test.pdf Normal file

Binary file not shown.

37
sendemail.py Normal file
View File

@@ -0,0 +1,37 @@
import smtplib
from email.mime.text import MIMEText
#设置服务器所需信息
#163邮箱服务器地址
mail_host = 'smtp.163.com'
#163用户名
mail_user = 'flypython***'
#密码(163等邮箱为授权码)
mail_pass = '7******x'
#发送地址
sender = 'flypython.com'
#发送内容为纯文本
message = MIMEText('Hello World ! This is from FlyPython!','plain','utf-8')
#email主题
message['Subject'] = 'FlyPython'
#发送地址
message['From'] = sender
#接受地址
receivers = ['flypython.com@gmail.com']
#接受地址的名称
message['To'] = ['flypython.com@gmail.com']
#登录并发送邮件
try:
smtpObj = smtplib.SMTP_SSL(mail_host, 465) # 启用SSL发信, 端口一般是465
smtpObj.login(mail_user, mail_pass) # 登录验证
#发送
smtpObj.sendmail(
sender,receivers,message.as_string())
#退出
smtpObj.quit()
print('success')
except smtplib.SMTPException as e:
print('error',e) #打印错误

50
wordcloud_hlm.py Normal file
View File

@@ -0,0 +1,50 @@
from os import path
from wordcloud import WordCloud
import jieba
import re
#
# special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE)
#
# #
# # text =""
# fw=open("hlm_seg.txt","w",encoding="utf-8")
# with open('hlm.txt',encoding="utf-8") as fp:
# for line in fp:
# l = special_character_removal.sub('', line.strip())
# words=jieba.cut(l)
#
# t=" ".join(words)
# fw.write(t)
# fw.write("\n")
# fw.close()
# import numpy as np
# from PIL import Image, ImageDraw, ImageFont
# background_image = np.array(Image.open("background.png"))
d = path.dirname(__file__)
# Read the whole text.
text = open(path.join(d, 'hlm_seg.txt'),encoding="utf-8").read()
# Generate a word cloud image
# font=path.join(d, "simkai.ttf")
font='C:/Windows/Fonts/simkai.ttf'
wordcloud = WordCloud(font_path=font,#设置中文字体,不指定就会出现中文不显示
width=1024,#宽
height=840,#高
background_color='white',#设置背景色
# mask=background_image#背景
# max_words=100,#最大词汇数
# max_font_size=100#最大号字体
).generate(text)
# Display the generated image:
# the matplotlib way:
import matplotlib.pyplot as plt
# lower max_font_size
# wordcloud = WordCloud(max_font_size=40).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

3492
wordcloud_hlm_seg.txt Normal file

File diff suppressed because one or more lines are too long