Compare commits

...

10 Commits

Author SHA1 Message Date
pezy
55c56c864c Merge branch 'master' of github.com:pezy/AutomateTheBoringStuffWithPython 2017-11-16 17:00:32 +08:00
pezy
7c317d0010 added fetch from URL 2017-11-16 16:59:58 +08:00
pezy
864bc0cc22 finished download image from huaban. 2017-11-16 00:25:39 +08:00
pezy
ffe50b01bb improving 2017-11-14 18:01:55 +08:00
pezy
db264a0b93 try to finished ch11 2017-11-10 18:19:24 +08:00
pezy
4c7cd73626 finished ch10 2017-11-09 11:40:54 +08:00
pezy
da6de632e8 finished ch9 2017-11-08 18:21:39 +08:00
kerogen-pezy
5eeca42044 Finished Ch08 2017-06-20 11:34:01 +08:00
kerogen-pezy
ad950d46c1 finished project of chapter 8 2017-06-20 11:14:34 +08:00
pezy_mbp
1fb40a6540 added regex_search 2017-06-20 08:29:48 +08:00
21 changed files with 530 additions and 8 deletions

View File

@@ -1,2 +0,0 @@
'rename', (512, 21)
'software', (1024, 34)

BIN
mcb.dat

Binary file not shown.

View File

@@ -1,2 +0,0 @@
'rename', (512, 21)
'software', (1024, 34)

14
practice_projects/2048.py Normal file
View File

@@ -0,0 +1,14 @@
#!python3
# 2048.py - auto play https://gabrielecirulli.github.io/2048/
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome()
browser.get("https://gabrielecirulli.github.io/2048/")
htmlElem = browser.find_element_by_tag_name('html')
while True:
htmlElem.send_keys(Keys.UP)
htmlElem.send_keys(Keys.RIGHT)
htmlElem.send_keys(Keys.DOWN)
htmlElem.send_keys(Keys.LEFT)

View File

@@ -0,0 +1,39 @@
#!python3
# cmd_email.py - send email by command
from selenium import webdriver
from time import sleep
import sys
if len(sys.argv) < 3:
print('Usage: python cmd_email.py [sendto] [message]')
sys.exit()
browser = webdriver.Chrome()
browser.get('https://ui.ptlogin2.qq.com/cgi-bin/login?style=9&appid=522005705&daid=4&s_url=https%3A%2F%2Fw.mail.qq.com%2Fcgi-bin%2Flogin%3Fvt%3Dpassport%26vm%3Dwsk%26delegate_url%3D%26f%3Dxhtml%26target%3D&hln_css=http%3A%2F%2Fmail.qq.com%2Fzh_CN%2Fhtmledition%2Fimages%2Flogo%2Fqqmail%2Fqqmail_logo_default_200h.png&low_login=1&hln_autologin=%E8%AE%B0%E4%BD%8F%E7%99%BB%E5%BD%95%E7%8A%B6%E6%80%81&pt_no_onekey=1')
sleep(1)
try:
qq = browser.find_element_by_id('u')
qq.send_keys('') # qq
password = browser.find_element_by_id('p')
password.send_keys('') ## qq password
go = browser.find_element_by_id('go')
go.click()
sleep(1)
password2 = browser.find_element_by_id('pwd')
password2.send_keys('') # mail password
password2.submit()
sleep(1)
write = browser.find_element_by_css_selector('.qm_btnIcon')
write.click()
sendto = browser.find_element_by_id('showto')
sendto.send_keys(sys.argv[1])
subject = browser.find_element_by_id('subject')
subject.send_keys('System mail by python')
content = browser.find_element_by_id('content')
content.send_keys(' '.join(sys.argv[2:]))
sleep(2)
submit = browser.find_element_by_css_selector('.qm_btn.qm_btn_Blue')
submit.click()
except:
print('Was not able to find an element with your qq.')

View File

@@ -0,0 +1,22 @@
#!python3
# coin_toss.py - a simple coin toss guessing game. The player gets two guesses.
import random
guess = ''
while guess not in ('heads', 'tails'):
print('Guess the coin toss! Enter heads or tails:')
guess = input()
toss = ('heads', 'tails')[random.randint(0, 1)] # 0 is tails, 1 is heads
assert toss in ('heads', 'tails'), 'toss should be heads or tails.'
if toss == guess:
print('You got it!')
else:
print('Nope! Guess again!')
guess = input()
if toss == guess:
print('You got it!')
else:
print('Nope. You are really bad at this game.')

View File

@@ -0,0 +1,21 @@
#!python3
# deleting_unneeded_files.py - walks through a folder tree and searches for
# exceptionally large files or folders. say, ones that have a file size of more
# than 100MB. Print these files with their absolute path to the screen.
import os
import shutil
def deleting_large_files(fd, mega):
for foldername, subfolders, filenames in os.walk(fd):
for filename in filenames:
filepath = os.path.join(foldername, filename)
size = os.path.getsize(filepath)
size /= 1024 * 1024
if size > mega:
print(filepath)
# os.unlink(filepath)
deleting_large_files("D:\\BaiduNetdiskDownload", 10)

View File

@@ -0,0 +1,30 @@
#!python
# downloadHuaban.py - download picture from http://huaban.com/favorite/beauty/
import requests, os, re
url = "http://huaban.com/favorite/beauty/"
os.makedirs('huaban', exist_ok=True)
print('Fetching waterfall...')
res = requests.get(url)
res.raise_for_status()
pin_id_match = re.compile('"pin_id":(\d+)')
pin_id_list = pin_id_match.findall(res.text)
for pin_id in pin_id_list:
url = 'http://huaban.com/pins/' + pin_id
res = requests.get(url)
res.raise_for_status()
image_key_match = re.compile('"pin":{"pin_id":' + pin_id + ',\s.+?"key":"(.+?)",')
image_key = image_key_match.findall(res.text)
url = 'http://img.hb.aicdn.com/' + image_key[0]
res = requests.get(url)
res.raise_for_status()
print('downloading %s.jpg...' % pin_id)
image = open('huaban/' + pin_id + '.jpg', 'wb')
for chunk in res.iter_content(100000):
image.write(chunk)
image.close()

View File

@@ -0,0 +1,63 @@
#!python3
# fetch_URL.py - Fetch all the link started at URL.
import sys
import os
import requests
import bs4
achieve = set()
def back_to_parent(url):
while url[-1] != '/':
url = url[:-1]
return url
def find_all_url(origin_url):
res = requests.get(origin_url)
if res.status_code == 404:
print("%s is 404..." % origin_url)
return
res.raise_for_status()
prev = back_to_parent(origin_url)
if origin_url is not sys.argv[1]:
achieve.add(origin_url.split('/')[-1])
soup = bs4.BeautifulSoup(res.text, 'html.parser')
for link in soup.find_all('a', href=True):
url = link.get('href')
if not url.startswith('http:') and not url.startswith('https:'):
if url is '#':
continue
elif url.startswith('../'):
url = back_to_parent(prev[:-1]) + url[3:]
else:
url = prev + url
filename = url.split('/')[-1]
if filename in achieve:
continue
if not url.endswith('.html'):
res = requests.get(url)
if res.status_code == 404:
continue
res.raise_for_status()
print('downloading %s...' % filename)
page = open('psi/' + filename, 'wb')
for chunk in res.iter_content(100000):
page.write(chunk)
page.close()
achieve.add(filename)
else:
print('opening %s...' % url)
find_all_url(url)
if len(sys.argv) != 2:
print('Usage: python fetch_URL.py [URL]')
sys.exit()
os.makedirs('psi', exist_ok=True)
find_all_url(sys.argv[1])

View File

@@ -0,0 +1,72 @@
#!python3
# filling_in_the_gaps.py - finds all files with a given prefix, such as
# `spam001.txt`, spam002.txt, and so on, in a single folder and locates any
# gaps in the numbering. Have the program rename all the later files to close
# this gap.
# write another program that can insert gaps into numbered files so that a new
# file can be added.
import os
import shutil
import re
import random
import pathlib
def gengap(fd, prefix, n):
os.chdir(fd)
for f in os.listdir('.'):
os.unlink(f)
smp = random.sample(range(1, 100), n)
for idx in smp:
newfile = os.path.join('.', prefix + str(idx).zfill(3) + '.txt')
pathlib.Path(newfile).touch()
def fillgap(fd, prefix):
os.chdir(fd)
reg = re.compile(r'^%s(\d+)' % prefix)
n = -1
sz = -1
for filename in os.listdir('.'):
mo = reg.search(filename)
if mo is not None:
if n == -1 and sz == -1:
n = int(mo.group(1))
sz = len(str(mo.group(1)))
else:
n += 1
newname = prefix + str(n).zfill(sz) +
os.path.splitext(filename)[1]
shutil.move(filename, newname)
def insertgap(fd, prefix):
os.chdir(fd)
reg = re.compile(r'^%s(\d+)' % prefix)
for filename in os.listdir('.'):
mo = reg.search(filename)
if mo is not None:
n = int(mo.group(1))
sz = len(str(mo.group(1)))
newfile = os.path.join('.', prefix + str(n).zfill(sz) +
os.path.splitext(filename)[1])
while os.path.exists(newfile):
n += 1
newfile = os.path.join('.', prefix + str(n).zfill(sz) +
os.path.splitext(filename)[1])
pathlib.Path(newfile).touch()
break
curpath = os.getcwd()
# os.chdir(curpath)
# gengap('test', 'spam', 9)
# os.chdir(curpath)
# insertgap('test', 'spam')
os.chdir(curpath)
fillgap('test', 'spam')

View File

@@ -31,9 +31,15 @@ mad_libs.py - reads in text files and lets the user add their own text anywhere
import re
file_name = input("Enter the filename:")
file_read = open(file_name)
content = file_read.read()
in_file_name = input("Enter the filename:\n")
in_file = open(in_file_name)
content = in_file.read()
for to_replace in re.compile(r'ADJECTIVE|NOUN|ADVERB|VERB').findall(content):
content.replace(to_replace)
str_input = input("Enter %s %s:\n" % ('an' if to_replace[0] in 'AEIOU' else
'a', to_replace.lower()))
content = content.replace(to_replace, str_input, 1)
out_file_name = input("Enter the output file name:\n")
out_file = open(out_file_name, 'w')
out_file.write(content)

View File

@@ -0,0 +1 @@
The silly panda walked to the chandelier and then screamed. A nearby picked truck was unaffected by these events.

View File

@@ -0,0 +1,21 @@
#! python3
'''
regex_search.py - opens all .txt files in a folder and searches for any line
that matches a user-supplied regular expression. The results
should be printed to the screen.
'''
import os
import re
target_folder = input("Enter the target foulder:\n")
os.chdir(target_folder)
target_files = os.listdir(target_folder)
target_files = [target for target in target_files if target.endswith('.txt')]
regex = re.compile(input('Enter the regular expression:\n'))
for file_name in target_files:
file = open(file_name)
for line in file.readlines():
words = regex.findall(line)
if words:
print(file_name + ": " + str(words))

View File

@@ -0,0 +1,13 @@
Welcome to RegExr v2.1 by gskinner.com, proudly hosted by Media Temple!
Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save Favorites & Share expressions with friends or the Community. Explore your results with Tools. A full Reference & Help is available in the Library, or watch the video Tutorial.
Sample text for testing:
abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
0123456789 _+-.,!@#$%^&*();\/|<>"'
12345 -98.7 3.141 .6180 9,000 +42
555.123.4567 +1-(800)-555-2468
foo@demo.net bar.ba@test.co.uk
www.demo.com http://foo.co.uk/
http://regexr.com/foo.html?q=bar
https://mediatemple.net

View File

@@ -0,0 +1,28 @@
#!python3
# selective_copy.py - walks through a folder tree and searches for files with a
# certain file extension (such as .pdf or .jpg). Copy these files from whatever
# location they are in to a new folder.
import os
import shutil
def selective_copy(flr, ext, new_flr):
if not os.path.exists(new_flr):
os.mkdir(new_flr)
base = os.path.basename(new_flr)
for foldername, subfolders, filenames in os.walk(flr):
new_base = os.path.basename(foldername)
if base == new_base:
continue
for filename in filenames:
if filename.endswith(ext):
filepath = os.path.join(foldername, filename)
new_filepath = os.path.join(new_flr, filename)
shutil.move(filepath, new_filepath)
selective_copy("C:\\Users\\pezy\\Downloads", ('.exe', '.msi'),
"C:\\Users\\pezy\\Downloads\\Software")

View File

@@ -7,3 +7,7 @@
- [Chapter 5 Dictionaries and Structuring Data](ch05/README.md)
- [Chapter 6 Manipulating Strings](ch06/README.md)
- [Chapter 7 Pattern Matching with Regular Expressions](ch07/README.md)
- [Chapter 8 Reading and Writing Files](ch08/README.md)
- [Chapter 9 Organizing Files](ch09/README.md)
- [Chapter 10 Debugging](ch10/README.md)
- [Chapter 11 Web Scraping](ch11/README.md)

View File

@@ -0,0 +1,37 @@
# Chapter 8 Reading and Writing Files
> Q: 1. What is a relative path relative to?
The current working directory.
> Q: 2. What does an absolute path start with?
The root folder. such as `/` or `C:\`
> Q: 3. What do the os.getcwd() and os.chdir() functions do?
Get current path, and change the current working directory.
> Q: 4. What are the . and .. folders?
`.` is current folder, `..` is the parent folder.
> Q: 5. In C:\bacon\eggs\spam.txt, which part is the dir name, and which part is the base name?
`C:\bacon\eggs` is the dir name, `spam.txt` is the base name.
> Q: 6. What are the three “mode” arguments that can be passed to the open() function?
`r`, `w`, `a`.
> Q: 7. What happens if an existing file is opened in write mode?
Existing file will be erased and completely overwritten.
> Q: 8. What is the difference between the read() and readlines() methods?
`read()` returns the file's entire contents as a single string. `readlines()` returns a list of each lines.
> Q: 9. What data structure does a shelf value resemble?
dictionary.

View File

@@ -0,0 +1,17 @@
# Chapter 9 Organizing Files
> Q: 1. What is the difference between `shutil.copy()` and `shutil.copytree()`?
`shutil.copy()` copies the file, `shutil.copytree()` copies the directory.
> Q: 2. What function is used to rename files?
`shutil.move()`
> Q: 3. What is the difference between the delete functions in the `send2trash` and `shutil` modules?
`send2trash` will move file to recycle bin, while `shutil` functions will permanently delete files and folders.
> Q: 4. `ZipFile` objects have a `close()` method just like `File` objects `close()` method. What `ZipFile` method is equivalent to `File` objects `open()` method?
`zipfile.ZipFile()`.

View File

@@ -0,0 +1,57 @@
# Chapter 10 Debugging
> Q: 1. Write an assert statement that triggers an AssertionError if the variable spam is an integer less than 10.
`assert(spam >= 10, 'The spam should be less than 10.')`
> Q: 2. Write an assert statement that triggers an AssertionError if the variables eggs and bacon contain strings that are the same as each other, even if their cases are different (that is, 'hello' and 'hello' are considered the same, and 'goodbye' and 'GOODbye' are also considered the same).
`assert(eggs.lower() != bacon.lower(), 'The eggs and bacon should contain different string.')`
> Q: 3. Write an assert statement that always triggers an AssertionError.
`assert(False, 'Always triggers an AssertionError')`
> Q: 4. What are the two lines that your program must have in order to be able to call logging.debug()?
```py
import logging
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
```
> Q: 5. What are the two lines that your program must have in order to have logging.debug() send a logging message to a file named programLog.txt?
```py
import logging
logging.basicConfig(filename='programLog.txt', level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
```
> Q: 6. What are the five logging levels?
DEBUG, INFO, WARNING, ERROR, CRITICAL
> Q: 7. What line of code can you add to disable all logging messages in your program?
`logging.disable()`
> Q: 8. Why is using logging messages better than using print() to display the same message?
You can disable logging message without removing the logging function calls. You can selectively disable lower-level logging messages.
> Q: 9. What are the differences between the Step, Over, and Out buttons in the Debug Control window?
Step: move in the function call.
Over: execute the function call without stepping into it.
Out: execute the rest of the code until it steps out of the function it currently is in.
> Q: 10. After you click Go in the Debug Control window, when will the debugger stop?
the end of the program or a line with a breakpoint.
> Q: 11. What is a breakpoint?
debugger to pause.
> Q: 12. How do you set a breakpoint on a line of code in IDLE?
Set breakpoint.

View File

@@ -0,0 +1,81 @@
# Chapter 11 Web Scraping
> Q: 1. Briefly describe the differences between the webbrowser, requests, BeautifulSoup, and selenium modules.
`webbrowser` can launch a web browser to a specific URL by `open()`;
`requests` can download files and pages from the Web.
`beautifulSoup` module parses HTML.
`selenium` launch and control a browser.
> Q: 2. What type of object is returned by requests.get()? How can you access the downloaded content as a string value?
Response object. `getText()`.
> Q: 3. What Requests method checks that the download worked?
`raise_for_status()`
> Q: 4. How can you get the HTTP status code of a Requests response?
`status_code`
> Q: 5. How do you save a Requests response to a file?
```py
saveFile = open('SaveFile', 'wb')
for chunk in res.iter_content(100000):
saveFile.write(chunk)
saveFile.close()
```
> Q: 6. What is the keyboard shortcut for opening a browsers developer tools?
F12
> Q: 7. How can you view (in the developer tools) the HTML of a specific element on a web page?
Inspect Element
> Q: 8. What is the CSS selector string that would find the element with an id attribute of main?
`#main`
> Q: 9. What is the CSS selector string that would find the elements with a CSS class of highlight?
`.highlight`
> Q: 10. What is the CSS selector string that would find all the `<div>` elements inside another `<div>` element?
`div div`
> Q: 11. What is the CSS selector string that would find the `<button>` element with a value attribute set to favorite?
`button[value='favorite']`
> Q: 12. Say you have a Beautiful Soup Tag object stored in the variable spam for the element `<div>`Hello world!`</div>`. How could you get a string 'Hello world!' from the Tag object?
spam.getText()
> Q: 13. How would you store all the attributes of a Beautiful Soup Tag object in a variable named linkElem?
linkElem.attrs
> Q: 14. Running import selenium doesnt work. How do you properly import the selenium module?
`from selenium import webdriver`
> Q: 15. Whats the difference between the find_element_* and find_elements_* methods?
one vs a list.
> Q: 16. What methods do Seleniums WebElement objects have for simulating mouse clicks and keyboard keys?
`click()` and `send_keys()`
> Q: 17. You could call send_keys(Keys.ENTER) on the Submit buttons WebElement object, but what is an easier way to submit a form with Selenium?
`submit()`
> Q: 18. How can you simulate clicking a browsers Forward, Back, and Refresh buttons with Selenium?
`forward()`, `back()`, `refresh()`