Files
python3-cookbook/basic/samples/excel/excel_mysql.py

230 lines
7.7 KiB
Python
Raw Normal View History

2015-05-04 17:58:26 +08:00
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
Topic: 将航信excel表数据转换为MySQL中的数据
Desc :
"""
import sys
import copy
from openpyxl import Workbook
from openpyxl import load_workbook
import logging
import datetime
import mysql.connector
from mysql.connector import errorcode
sql_create1 = """
CREATE TABLE t_enterprise (
id BIGINT PRIMARY KEY AUTO_INCREMENT COMMENT '主键ID',
2015-05-05 11:23:13 +08:00
name VARCHAR(100) COMMENT '企业名称',
tax_code VARCHAR(30) COMMENT '税号',
2015-05-04 17:58:26 +08:00
region_id BIGINT COMMENT '区域ID',
customer_type INTEGER COMMENT '客户类型',
enterprise_type INTEGER COMMENT '企业类型',
address VARCHAR(200) COMMENT '详细地址',
postcode VARCHAR(10) COMMENT '邮编',
2015-05-05 11:23:13 +08:00
tel VARCHAR(50) COMMENT '联系电话',
2015-05-05 16:11:22 +08:00
contact VARCHAR(60) COMMENT '联系人',
2015-05-04 17:58:26 +08:00
fax VARCHAR(30) COMMENT '传真',
2015-05-05 16:11:22 +08:00
mobile VARCHAR(80) COMMENT '手机号',
2015-05-04 17:58:26 +08:00
created_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT '企业表';
"""
sql_create2 = """
CREATE TABLE t_region (
id BIGINT PRIMARY KEY AUTO_INCREMENT COMMENT '主键ID',
region_code VARCHAR(16) COMMENT '邮编',
regian_name VARCHAR(20) COMMENT '区域名',
2015-05-05 11:23:13 +08:00
note VARCHAR(200) COMMENT '备注',
2015-05-04 17:58:26 +08:00
parent_id BIGINT COMMENT '父级ID',
created_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT '区域表';
"""
sql_insert_enterprise = """
INSERT INTO t_enterprise
(id,name,tax_code,region_id,customer_type,enterprise_type)
VALUES (%s, %s, %s, %s, %s, %s);
"""
sql_update_enterprise = """
UPDATE t_enterprise
SET
address=%s,
postcode=%s,
tel=%s,
contact=%s,
fax=%s,
mobile=%s
WHERE id=%s
"""
sql_insert_region = """
INSERT INTO t_region
(id,region_code,regian_name,note,parent_id)
VALUES (%s, %s, %s, %s, %s);
"""
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
2015-05-05 11:23:13 +08:00
datefmt='%Y-%m-%d %H:%M:%S',
2015-05-05 16:11:22 +08:00
handlers=[logging.FileHandler('d:/logs/excel.log', 'a', 'utf-8')])
2015-05-04 17:58:26 +08:00
_log = logging.getLogger('app.' + __name__)
def _connect():
config = {
'user': 'root',
'password': 'mysql',
'host': '192.168.203.95',
'database': 'hangxin',
'raise_on_warnings': True,
}
cnx = None
try:
cnx = mysql.connector.connect(**config)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
if cnx:
cnx.close()
return cnx
def _init_table():
conn_ = _connect()
cursor = conn_.cursor()
cursor.execute(sql_create1)
cursor.execute(sql_create2)
cursor.close()
conn_.commit()
conn_.close()
def parse_sheet(wb, sheet_name, column_num, log_msg):
ws = wb[sheet_name] # ws is now an IterableWorksheet
result_list = []
for row in ws.rows:
row_data = []
for i, cell in enumerate(row):
if i >= column_num:
break
row_data.append(cell.value)
result_list.append(row_data[:])
2015-05-05 11:23:13 +08:00
_log.info(log_msg)
2015-05-04 17:58:26 +08:00
return result_list
2015-05-05 16:11:22 +08:00
def handle_wrong_line(conn_, cursor_, wrong_line, ty=1):
# 处理企业资料创建时的错误
if ty == 1:
id1 = wrong_line[0][0]
many_data = wrong_line[0][1]
next1 = wrong_line[1][0]
next2 = wrong_line[1][1]
next3 = wrong_line[1][2]
many_data = many_data.replace('"', '')
many_data = many_data.replace('_x000D_', '')
many_list = [s.split() for s in many_data.split('\n') if s]
many_list[0].insert(0, str(id1))
many_list[-1][-1] = "{}{}".format(many_list[-1][-1], next1)
many_list[-1].append(next2)
many_list[-1].append(next3)
# 扩充至指定的长度
for many_item in many_list:
if len(many_item) < 6:
for i in range(6 - len(many_item)):
many_item.append(None)
for i, v in enumerate([tuple(li) for li in many_list]):
try:
cursor_.execute(sql_insert_enterprise, v)
except:
conn_.rollback()
logging.exception('handle_wrong_line哈企业资料Exception,line={}'.format(i))
continue
if i % 50 == 0:
conn_.commit()
conn_.commit()
2015-05-05 11:23:13 +08:00
2015-05-04 17:58:26 +08:00
def xlsx_to_table(xlsx_name):
conn_ = _connect()
2015-05-05 11:23:13 +08:00
_log.info('Excel文件解析start')
2015-05-04 17:58:26 +08:00
wb = load_workbook(xlsx_name, read_only=True)
# 先收集企业资料表
list1 = parse_sheet(wb, 'customer', 6, 'customer表解析end')
data1 = [(v[0], v[1], v[2], v[3], v[4], v[5]) for v in list1[1:]]
# 收集地址和联系人表
list2 = parse_sheet(wb, 'addr', 10, 'addr表解析end')
data2 = [(v[2], v[4], v[5], v[6], v[7], v[8], v[0]) for v in list2[1:]]
# 收集区域表
list3 = parse_sheet(wb, 'region', 5, 'region表解析end')
data3 = [(v[0], v[1], v[2], v[3], v[4]) for v in list3[1:]]
2015-05-05 11:23:13 +08:00
_log.info('Excel文件解析end')
2015-05-05 16:11:22 +08:00
_log.info('---------------------------分割线-----------------------------')
2015-05-05 11:23:13 +08:00
_log.info('数据库更新start')
2015-05-04 17:58:26 +08:00
cursor = conn_.cursor()
2015-05-05 16:11:22 +08:00
_log.info('插入企业资料start')
wrong1 = []
find_large_name = False
for i, d1 in enumerate(data1):
if find_large_name:
wrong1.append(d1)
handle_wrong_line(conn_, cursor, wrong1)
wrong1.clear()
find_large_name = False
try:
2015-05-05 11:23:13 +08:00
cursor.execute(sql_insert_enterprise, d1)
2015-05-05 16:11:22 +08:00
except:
conn_.rollback()
if len(str(d1[1])) > 600:
logging.exception('-------插入企业资料Exception,line={}--------'.format(i))
wrong1.append(d1)
find_large_name = True
continue
if i % 50 == 0:
conn_.commit()
conn_.commit()
_log.info('插入企业资料end')
2015-05-05 11:23:13 +08:00
2015-05-05 16:11:22 +08:00
_log.info('更新企业联系信息start')
for i, d2 in enumerate(data2):
try:
2015-05-05 11:23:13 +08:00
cursor.execute(sql_update_enterprise, d2)
2015-05-05 16:11:22 +08:00
except:
conn_.rollback()
logging.exception('-------更新企业联系信息Exception,line={}-------'.format(i))
handle_wrong_line(conn_, cursor,d2, ty=2)
if i % 50 == 0:
conn_.commit()
conn_.commit()
_log.info('插入企业资料表end')
2015-05-05 11:23:13 +08:00
2015-05-05 16:11:22 +08:00
_log.info('插入区域信息start')
for i, d3 in enumerate(data3):
try:
2015-05-05 11:23:13 +08:00
cursor.execute(sql_insert_region, d3)
2015-05-05 16:11:22 +08:00
except:
conn_.rollback()
logging.exception('-------插入区域信息Exception,line={}-------'.format(i))
handle_wrong_line(conn_, cursor, d3, ty=3)
if i % 50 == 0:
conn_.commit()
conn_.commit()
_log.info('插入区域信息end')
2015-05-05 11:23:13 +08:00
_log.info('数据库更新end')
2015-05-04 17:58:26 +08:00
cursor.close()
conn_.close()
if __name__ == '__main__':
2015-05-07 12:02:58 +08:00
excel = r'D:\download\20150505\gdc.xlsx'
2015-05-04 17:58:26 +08:00
_init_table()
conn = _connect()
xlsx_to_table(excel)
pass