Merge branch 'master' of github.com:pezy/AutomateTheBoringStuffWithPython

added fetch from URL
finished download image from huaban.
2017-11-16 17:00:32 +08:00 · 2017-11-16 16:59:58 +08:00 · 2017-11-16 00:25:39 +08:00 · 2017-11-14 18:01:55 +08:00 · 2017-11-10 18:19:24 +08:00 · 2017-11-09 11:40:54 +08:00
21 changed files with 530 additions and 8 deletions
--- a/mcb.bak
+++ b/mcb.bak
@@ -1,2 +0,0 @@
-'rename', (512, 21)
-'software', (1024, 34)
--- a/mcb.dat
+++ b/mcb.dat
--- a/mcb.dir
+++ b/mcb.dir
@@ -1,2 +0,0 @@
-'rename', (512, 21)
-'software', (1024, 34)
--- a/practice_projects/2048.py
+++ b/practice_projects/2048.py
@@ -0,0 +1,14 @@
+#!python3
+# 2048.py - auto play https://gabrielecirulli.github.io/2048/
+
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+
+browser = webdriver.Chrome()
+browser.get("https://gabrielecirulli.github.io/2048/")
+htmlElem = browser.find_element_by_tag_name('html')
+while True:
+    htmlElem.send_keys(Keys.UP)
+    htmlElem.send_keys(Keys.RIGHT)
+    htmlElem.send_keys(Keys.DOWN)
+    htmlElem.send_keys(Keys.LEFT)
--- a/practice_projects/cmd_email.py
+++ b/practice_projects/cmd_email.py
@@ -0,0 +1,39 @@
+#!python3
+# cmd_email.py - send email by command
+
+from selenium import webdriver
+from time import sleep
+import sys
+
+if len(sys.argv) < 3:
+    print('Usage: python cmd_email.py [sendto] [message]')
+    sys.exit()
+
+browser = webdriver.Chrome()
+browser.get('https://ui.ptlogin2.qq.com/cgi-bin/login?style=9&appid=522005705&daid=4&s_url=https%3A%2F%2Fw.mail.qq.com%2Fcgi-bin%2Flogin%3Fvt%3Dpassport%26vm%3Dwsk%26delegate_url%3D%26f%3Dxhtml%26target%3D&hln_css=http%3A%2F%2Fmail.qq.com%2Fzh_CN%2Fhtmledition%2Fimages%2Flogo%2Fqqmail%2Fqqmail_logo_default_200h.png&low_login=1&hln_autologin=%E8%AE%B0%E4%BD%8F%E7%99%BB%E5%BD%95%E7%8A%B6%E6%80%81&pt_no_onekey=1')
+sleep(1)
+try:
+    qq = browser.find_element_by_id('u')
+    qq.send_keys('') # qq
+    password = browser.find_element_by_id('p')
+    password.send_keys('') ## qq password
+    go = browser.find_element_by_id('go')
+    go.click()
+    sleep(1)
+    password2 = browser.find_element_by_id('pwd')
+    password2.send_keys('') # mail password
+    password2.submit()
+    sleep(1)
+    write = browser.find_element_by_css_selector('.qm_btnIcon')
+    write.click()
+    sendto = browser.find_element_by_id('showto')
+    sendto.send_keys(sys.argv[1])
+    subject = browser.find_element_by_id('subject')
+    subject.send_keys('System mail by python')
+    content = browser.find_element_by_id('content')
+    content.send_keys(' '.join(sys.argv[2:]))
+    sleep(2)
+    submit = browser.find_element_by_css_selector('.qm_btn.qm_btn_Blue')
+    submit.click()
+except:
+    print('Was not able to find an element with your qq.')
--- a/practice_projects/coin_toss.py
+++ b/practice_projects/coin_toss.py
@@ -0,0 +1,22 @@
+#!python3
+# coin_toss.py - a simple coin toss guessing game. The player gets two guesses.
+
+import random
+
+guess = ''
+while guess not in ('heads', 'tails'):
+    print('Guess the coin toss! Enter heads or tails:')
+    guess = input()
+toss = ('heads', 'tails')[random.randint(0, 1)]  # 0 is tails, 1 is heads
+
+assert toss in ('heads', 'tails'), 'toss should be heads or tails.'
+
+if toss == guess:
+    print('You got it!')
+else:
+    print('Nope! Guess again!')
+    guess = input()
+    if toss == guess:
+        print('You got it!')
+    else:
+        print('Nope. You are really bad at this game.')
--- a/practice_projects/deleting_unneeded_files.py
+++ b/practice_projects/deleting_unneeded_files.py
@@ -0,0 +1,21 @@
+#!python3
+# deleting_unneeded_files.py - walks through a folder tree and searches for 
+# exceptionally large files or folders. say, ones that have a file size of more
+# than 100MB. Print these files with their absolute path to the screen.
+
+import os
+import shutil
+
+
+def deleting_large_files(fd, mega):
+    for foldername, subfolders, filenames in os.walk(fd):
+        for filename in filenames:
+            filepath = os.path.join(foldername, filename)
+            size = os.path.getsize(filepath)
+            size /= 1024 * 1024
+            if size > mega:
+                print(filepath)
+                # os.unlink(filepath)
+
+
+deleting_large_files("D:\\BaiduNetdiskDownload", 10)
--- a/practice_projects/downloadHuaban.py
+++ b/practice_projects/downloadHuaban.py
@@ -0,0 +1,30 @@
+#!python
+# downloadHuaban.py - download picture from http://huaban.com/favorite/beauty/
+
+import requests, os, re
+
+url = "http://huaban.com/favorite/beauty/"
+os.makedirs('huaban', exist_ok=True)
+print('Fetching waterfall...')
+res = requests.get(url)
+res.raise_for_status()
+
+pin_id_match = re.compile('"pin_id":(\d+)')
+pin_id_list = pin_id_match.findall(res.text)
+
+for pin_id in pin_id_list:
+    url = 'http://huaban.com/pins/' + pin_id
+    res = requests.get(url)
+    res.raise_for_status()
+
+    image_key_match = re.compile('"pin":{"pin_id":' + pin_id + ',\s.+?"key":"(.+?)",')
+    image_key = image_key_match.findall(res.text)
+    url = 'http://img.hb.aicdn.com/' + image_key[0]
+    res = requests.get(url)
+    res.raise_for_status()
+
+    print('downloading %s.jpg...' % pin_id)
+    image = open('huaban/' + pin_id + '.jpg', 'wb')
+    for chunk in res.iter_content(100000):
+        image.write(chunk)
+    image.close()
--- a/practice_projects/fetch_URL.py
+++ b/practice_projects/fetch_URL.py
@@ -0,0 +1,63 @@
+#!python3
+# fetch_URL.py - Fetch all the link started at URL.
+
+import sys
+import os
+import requests
+import bs4
+
+achieve = set()
+
+
+def back_to_parent(url):
+    while url[-1] != '/':
+        url = url[:-1]
+    return url
+
+
+def find_all_url(origin_url):
+    res = requests.get(origin_url)
+    if res.status_code == 404:
+        print("%s is 404..." % origin_url)
+        return
+    res.raise_for_status()
+
+    prev = back_to_parent(origin_url)
+    if origin_url is not sys.argv[1]:
+        achieve.add(origin_url.split('/')[-1])
+
+    soup = bs4.BeautifulSoup(res.text, 'html.parser')
+    for link in soup.find_all('a', href=True):
+        url = link.get('href')
+        if not url.startswith('http:') and not url.startswith('https:'):
+            if url is '#':
+                continue
+            elif url.startswith('../'):
+                url = back_to_parent(prev[:-1]) + url[3:]
+            else:
+                url = prev + url
+        filename = url.split('/')[-1]
+        if filename in achieve:
+            continue
+        if not url.endswith('.html'):
+            res = requests.get(url)
+            if res.status_code == 404:
+                continue
+            res.raise_for_status()
+            print('downloading %s...' % filename)
+            page = open('psi/' + filename, 'wb')
+            for chunk in res.iter_content(100000):
+                page.write(chunk)
+            page.close()
+            achieve.add(filename)
+        else:
+            print('opening %s...' % url)
+            find_all_url(url)
+
+
+if len(sys.argv) != 2:
+    print('Usage: python fetch_URL.py [URL]')
+    sys.exit()
+
+os.makedirs('psi', exist_ok=True)
+find_all_url(sys.argv[1])
--- a/practice_projects/filling_in_the_gaps.py
+++ b/practice_projects/filling_in_the_gaps.py
@@ -0,0 +1,72 @@
+#!python3
+# filling_in_the_gaps.py - finds all files with a given prefix, such as
+# `spam001.txt`, spam002.txt, and so on, in a single folder and locates any
+# gaps in the numbering. Have the program rename all the later files to close
+# this gap.
+# write another program that can insert gaps into numbered files so that a new
+# file can be added.
+
+import os
+import shutil
+import re
+import random
+import pathlib
+
+
+def gengap(fd, prefix, n):
+    os.chdir(fd)
+    for f in os.listdir('.'):
+        os.unlink(f)
+
+    smp = random.sample(range(1, 100), n)
+    for idx in smp:
+        newfile = os.path.join('.', prefix + str(idx).zfill(3) + '.txt')
+        pathlib.Path(newfile).touch()
+
+
+def fillgap(fd, prefix):
+    os.chdir(fd)
+    reg = re.compile(r'^%s(\d+)' % prefix)
+    n = -1
+    sz = -1
+    for filename in os.listdir('.'):
+        mo = reg.search(filename)
+        if mo is not None:
+            if n == -1 and sz == -1:
+                n = int(mo.group(1))
+                sz = len(str(mo.group(1)))
+            else:
+                n += 1
+                newname = prefix + str(n).zfill(sz) +
+                os.path.splitext(filename)[1]
+                shutil.move(filename, newname)
+
+
+def insertgap(fd, prefix):
+    os.chdir(fd)
+    reg = re.compile(r'^%s(\d+)' % prefix)
+    for filename in os.listdir('.'):
+        mo = reg.search(filename)
+        if mo is not None:
+            n = int(mo.group(1))
+            sz = len(str(mo.group(1)))
+            newfile = os.path.join('.', prefix + str(n).zfill(sz) +
+                                   os.path.splitext(filename)[1])
+            while os.path.exists(newfile):
+                n += 1
+                newfile = os.path.join('.', prefix + str(n).zfill(sz) +
+                                       os.path.splitext(filename)[1])
+            pathlib.Path(newfile).touch()
+            break
+
+
+curpath = os.getcwd()
+
+# os.chdir(curpath)
+# gengap('test', 'spam', 9)
+
+# os.chdir(curpath)
+# insertgap('test', 'spam')
+
+os.chdir(curpath)
+fillgap('test', 'spam')
--- a/practice_projects/mad_libs.py
+++ b/practice_projects/mad_libs.py
@@ -31,9 +31,15 @@ mad_libs.py - reads in text files and lets the user add their own text anywhere

 import re

-file_name = input("Enter the filename:")
-file_read = open(file_name)
-content = file_read.read()
+in_file_name = input("Enter the filename:\n")
+in_file = open(in_file_name)
+content = in_file.read()

 for to_replace in re.compile(r'ADJECTIVE|NOUN|ADVERB|VERB').findall(content):
-    content.replace(to_replace)
+    str_input = input("Enter %s %s:\n" % ('an' if to_replace[0] in 'AEIOU' else
+                      'a', to_replace.lower()))
+    content = content.replace(to_replace, str_input, 1)
+
+out_file_name = input("Enter the output file name:\n")
+out_file = open(out_file_name, 'w')
+out_file.write(content)
--- a/practice_projects/out_mad_libs.txt
+++ b/practice_projects/out_mad_libs.txt
@@ -0,0 +1 @@
+The silly panda walked to the chandelier and then screamed. A nearby picked truck was unaffected by these events.
--- a/practice_projects/regex_search.py
+++ b/practice_projects/regex_search.py
@@ -0,0 +1,21 @@
+#! python3
+'''
+regex_search.py - opens all .txt files in a folder and searches for any line
+                  that matches a user-supplied regular expression. The results
+                  should be printed to the screen.
+'''
+
+import os
+import re
+
+target_folder = input("Enter the target foulder:\n")
+os.chdir(target_folder)
+target_files = os.listdir(target_folder)
+target_files = [target for target in target_files if target.endswith('.txt')]
+regex = re.compile(input('Enter the regular expression:\n'))
+for file_name in target_files:
+    file = open(file_name)
+    for line in file.readlines():
+        words = regex.findall(line)
+        if words:
+            print(file_name + ": " + str(words))
--- a/practice_projects/regex_test.txt
+++ b/practice_projects/regex_test.txt
@@ -0,0 +1,13 @@
+Welcome to RegExr v2.1 by gskinner.com, proudly hosted by Media Temple!
+
+Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save Favorites & Share expressions with friends or the Community. Explore your results with Tools. A full Reference & Help is available in the Library, or watch the video Tutorial.
+
+Sample text for testing:
+abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
+0123456789 _+-.,!@#$%^&*();\/|<>"'
+12345 -98.7 3.141 .6180 9,000 +42
+555.123.4567	+1-(800)-555-2468
+foo@demo.net	bar.ba@test.co.uk
+www.demo.com	http://foo.co.uk/
+http://regexr.com/foo.html?q=bar
+https://mediatemple.net
--- a/practice_projects/selective_copy.py
+++ b/practice_projects/selective_copy.py
@@ -0,0 +1,28 @@
+#!python3
+# selective_copy.py - walks through a folder tree and searches for files with a
+# certain file extension (such as .pdf or .jpg). Copy these files from whatever
+# location they are in to a new folder.
+
+import os
+import shutil
+
+
+def selective_copy(flr, ext, new_flr):
+    if not os.path.exists(new_flr):
+        os.mkdir(new_flr)
+    base = os.path.basename(new_flr)
+
+    for foldername, subfolders, filenames in os.walk(flr):
+        new_base = os.path.basename(foldername)
+        if base == new_base:
+            continue
+
+        for filename in filenames:
+            if filename.endswith(ext):
+                filepath = os.path.join(foldername, filename)
+                new_filepath = os.path.join(new_flr, filename)
+                shutil.move(filepath, new_filepath)
+
+
+selective_copy("C:\\Users\\pezy\\Downloads", ('.exe', '.msi'),
+               "C:\\Users\\pezy\\Downloads\\Software")
--- a/practice_projects/test_mad_libs.txt
+++ b/practice_projects/test_mad_libs.txt
--- a/practice_questions/README.md
+++ b/practice_questions/README.md
@@ -7,3 +7,7 @@
 - [Chapter 5 – Dictionaries and Structuring Data](ch05/README.md)
 - [Chapter 6 – Manipulating Strings](ch06/README.md)
 - [Chapter 7 – Pattern Matching with Regular Expressions](ch07/README.md)
+- [Chapter 8 – Reading and Writing Files](ch08/README.md)
+- [Chapter 9 – Organizing Files](ch09/README.md)
+- [Chapter 10 – Debugging](ch10/README.md)
+- [Chapter 11 – Web Scraping](ch11/README.md)
--- a/practice_questions/ch08/README.md
+++ b/practice_questions/ch08/README.md
@@ -0,0 +1,37 @@
+# Chapter 8 – Reading and Writing Files
+
+> Q: 1. What is a relative path relative to?
+
+The current working directory.
+
+> Q: 2. What does an absolute path start with?
+
+The root folder. such as `/` or `C:\`
+
+> Q: 3. What do the os.getcwd() and os.chdir() functions do?
+
+Get current path, and change the current working directory.
+
+> Q: 4. What are the . and .. folders?
+
+`.` is current folder, `..` is the parent folder.
+
+> Q: 5. In C:\bacon\eggs\spam.txt, which part is the dir name, and which part is the base name?
+
+`C:\bacon\eggs` is the dir name, `spam.txt` is the base name.
+
+> Q: 6. What are the three “mode” arguments that can be passed to the open() function?
+
+`r`, `w`, `a`.
+
+> Q: 7. What happens if an existing file is opened in write mode?
+
+Existing file will be erased and completely overwritten.
+
+> Q: 8. What is the difference between the read() and readlines() methods?
+
+`read()` returns the file's entire contents as a single string. `readlines()` returns a list of each lines.
+
+> Q: 9. What data structure does a shelf value resemble?
+
+dictionary.
--- a/practice_questions/ch09/README.md
+++ b/practice_questions/ch09/README.md
@@ -0,0 +1,17 @@
+# Chapter 9 – Organizing Files
+
+> Q: 1. What is the difference between `shutil.copy()` and `shutil.copytree()`?
+
+`shutil.copy()` copies the file, `shutil.copytree()` copies the directory.
+
+> Q: 2. What function is used to rename files?
+
+`shutil.move()`
+
+> Q: 3. What is the difference between the delete functions in the `send2trash` and `shutil` modules?
+
+`send2trash` will move file to recycle bin, while `shutil` functions will permanently delete files and folders.
+
+> Q: 4. `ZipFile` objects have a `close()` method just like `File` objects’ `close()` method. What `ZipFile` method is equivalent to `File` objects’ `open()` method?
+
+`zipfile.ZipFile()`.
--- a/practice_questions/ch10/README.md
+++ b/practice_questions/ch10/README.md
@@ -0,0 +1,57 @@
+# Chapter 10 – Debugging
+
+> Q: 1. Write an assert statement that triggers an AssertionError if the variable spam is an integer less than 10.
+
+`assert(spam >= 10, 'The spam should be less than 10.')`
+
+> Q: 2. Write an assert statement that triggers an AssertionError if the variables eggs and bacon contain strings that are the same as each other, even if their cases are different (that is, 'hello' and 'hello' are considered the same, and 'goodbye' and 'GOODbye' are also considered the same).
+
+`assert(eggs.lower() != bacon.lower(), 'The eggs and bacon should contain different string.')`
+
+> Q: 3. Write an assert statement that always triggers an AssertionError.
+
+`assert(False, 'Always triggers an AssertionError')`
+
+> Q: 4. What are the two lines that your program must have in order to be able to call logging.debug()?
+
+```py
+import logging
+logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
+```
+
+> Q: 5. What are the two lines that your program must have in order to have logging.debug() send a logging message to a file named programLog.txt?
+
+```py
+import logging
+logging.basicConfig(filename='programLog.txt', level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
+```
+
+> Q: 6. What are the five logging levels?
+
+DEBUG, INFO, WARNING, ERROR, CRITICAL
+
+> Q: 7. What line of code can you add to disable all logging messages in your program?
+
+`logging.disable()`
+
+> Q: 8. Why is using logging messages better than using print() to display the same message?
+
+You can disable logging message without removing the logging function calls. You can selectively disable lower-level logging messages.
+
+> Q: 9. What are the differences between the Step, Over, and Out buttons in the Debug Control window?
+
+Step: move in the function call.
+Over: execute the function call without stepping into it.
+Out:  execute the rest of the code until it steps out of the function it currently is in.
+
+> Q: 10. After you click Go in the Debug Control window, when will the debugger stop?
+
+the end of the program or a line with a breakpoint.
+
+> Q: 11. What is a breakpoint?
+
+debugger to pause.
+
+> Q: 12. How do you set a breakpoint on a line of code in IDLE?
+
+Set breakpoint.
--- a/practice_questions/ch11/README.md
+++ b/practice_questions/ch11/README.md
@@ -0,0 +1,81 @@
+# Chapter 11 – Web Scraping
+
+> Q: 1. Briefly describe the differences between the webbrowser, requests, BeautifulSoup, and selenium modules.
+
+`webbrowser` can launch a web browser to a specific URL by `open()`;
+`requests` can download files and pages from the Web.
+`beautifulSoup` module parses HTML.
+`selenium` launch and control a browser.
+
+> Q: 2. What type of object is returned by requests.get()? How can you access the downloaded content as a string value?
+
+Response object. `getText()`.
+
+> Q: 3. What Requests method checks that the download worked?
+
+`raise_for_status()`
+
+> Q: 4. How can you get the HTTP status code of a Requests response?
+
+`status_code`
+
+> Q: 5. How do you save a Requests response to a file?
+
+```py
+saveFile = open('SaveFile', 'wb')
+for chunk in res.iter_content(100000):
+    saveFile.write(chunk)
+saveFile.close()
+```
+
+> Q: 6. What is the keyboard shortcut for opening a browser’s developer tools?
+
+F12
+
+> Q: 7. How can you view (in the developer tools) the HTML of a specific element on a web page?
+
+Inspect Element
+
+> Q: 8. What is the CSS selector string that would find the element with an id attribute of main?
+
+`#main`
+
+> Q: 9. What is the CSS selector string that would find the elements with a CSS class of highlight?
+
+`.highlight`
+
+> Q: 10. What is the CSS selector string that would find all the `<div>` elements inside another `<div>` element?
+
+`div div`
+
+> Q: 11. What is the CSS selector string that would find the `<button>` element with a value attribute set to favorite?
+
+`button[value='favorite']`
+
+> Q: 12. Say you have a Beautiful Soup Tag object stored in the variable spam for the element `<div>`Hello world!`</div>`. How could you get a string 'Hello world!' from the Tag object?
+
+spam.getText()
+
+> Q: 13. How would you store all the attributes of a Beautiful Soup Tag object in a variable named linkElem?
+
+linkElem.attrs
+
+> Q: 14. Running import selenium doesn’t work. How do you properly import the selenium module?
+
+`from selenium import webdriver`
+
+> Q: 15. What’s the difference between the find_element_* and find_elements_* methods?
+
+one vs a list.
+
+> Q: 16. What methods do Selenium’s WebElement objects have for simulating mouse clicks and keyboard keys?
+
+`click()` and `send_keys()`
+
+> Q: 17. You could call send_keys(Keys.ENTER) on the Submit button’s WebElement object, but what is an easier way to submit a form with Selenium?
+
+`submit()`
+
+> Q: 18. How can you simulate clicking a browser’s Forward, Back, and Refresh buttons with Selenium?
+
+`forward()`, `back()`, `refresh()`
Author	SHA1	Message	Date
pezy	55c56c864c	Merge branch 'master' of github.com:pezy/AutomateTheBoringStuffWithPython	2017-11-16 17:00:32 +08:00
pezy	7c317d0010	added fetch from URL	2017-11-16 16:59:58 +08:00
pezy	864bc0cc22	finished download image from huaban.	2017-11-16 00:25:39 +08:00
pezy	ffe50b01bb	improving	2017-11-14 18:01:55 +08:00
pezy	db264a0b93	try to finished ch11	2017-11-10 18:19:24 +08:00
pezy	4c7cd73626	finished ch10	2017-11-09 11:40:54 +08:00
pezy	da6de632e8	finished ch9	2017-11-08 18:21:39 +08:00
kerogen-pezy	5eeca42044	Finished Ch08	2017-06-20 11:34:01 +08:00
kerogen-pezy	ad950d46c1	finished project of chapter 8	2017-06-20 11:14:34 +08:00
pezy_mbp	1fb40a6540	added regex_search	2017-06-20 08:29:48 +08:00
				`@@ -0,0 +1 @@`
				`The silly panda walked to the chandelier and then screamed. A nearby picked truck was unaffected by these events.`