2015-05-04 17:58:26 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Topic: 将航信excel表数据转换为MySQL中的数据
|
|
|
|
|
Desc :
|
|
|
|
|
"""
|
|
|
|
|
import sys
|
|
|
|
|
import copy
|
|
|
|
|
from openpyxl import Workbook
|
|
|
|
|
from openpyxl import load_workbook
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
import datetime
|
|
|
|
|
import mysql.connector
|
|
|
|
|
from mysql.connector import errorcode
|
|
|
|
|
|
|
|
|
|
sql_create1 = """
|
|
|
|
|
CREATE TABLE t_enterprise (
|
|
|
|
|
id BIGINT PRIMARY KEY AUTO_INCREMENT COMMENT '主键ID',
|
2015-05-05 11:23:13 +08:00
|
|
|
name VARCHAR(100) COMMENT '企业名称',
|
|
|
|
|
tax_code VARCHAR(30) COMMENT '税号',
|
2015-05-04 17:58:26 +08:00
|
|
|
region_id BIGINT COMMENT '区域ID',
|
|
|
|
|
customer_type INTEGER COMMENT '客户类型',
|
|
|
|
|
enterprise_type INTEGER COMMENT '企业类型',
|
|
|
|
|
address VARCHAR(200) COMMENT '详细地址',
|
|
|
|
|
postcode VARCHAR(10) COMMENT '邮编',
|
2015-05-05 11:23:13 +08:00
|
|
|
tel VARCHAR(50) COMMENT '联系电话',
|
2015-05-04 17:58:26 +08:00
|
|
|
contact VARCHAR(10) COMMENT '联系人',
|
|
|
|
|
fax VARCHAR(30) COMMENT '传真',
|
2015-05-05 11:23:13 +08:00
|
|
|
mobile VARCHAR(16) COMMENT '手机号',
|
2015-05-04 17:58:26 +08:00
|
|
|
created_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
|
|
|
updated_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
|
|
|
|
|
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT '企业表';
|
|
|
|
|
"""
|
|
|
|
|
sql_create2 = """
|
|
|
|
|
CREATE TABLE t_region (
|
|
|
|
|
id BIGINT PRIMARY KEY AUTO_INCREMENT COMMENT '主键ID',
|
|
|
|
|
region_code VARCHAR(16) COMMENT '邮编',
|
|
|
|
|
regian_name VARCHAR(20) COMMENT '区域名',
|
2015-05-05 11:23:13 +08:00
|
|
|
note VARCHAR(200) COMMENT '备注',
|
2015-05-04 17:58:26 +08:00
|
|
|
parent_id BIGINT COMMENT '父级ID',
|
|
|
|
|
created_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
|
|
|
updated_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
|
|
|
|
|
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT '区域表';
|
|
|
|
|
"""
|
|
|
|
|
sql_insert_enterprise = """
|
|
|
|
|
INSERT INTO t_enterprise
|
|
|
|
|
(id,name,tax_code,region_id,customer_type,enterprise_type)
|
|
|
|
|
VALUES (%s, %s, %s, %s, %s, %s);
|
|
|
|
|
"""
|
|
|
|
|
sql_update_enterprise = """
|
|
|
|
|
UPDATE t_enterprise
|
|
|
|
|
SET
|
|
|
|
|
address=%s,
|
|
|
|
|
postcode=%s,
|
|
|
|
|
tel=%s,
|
|
|
|
|
contact=%s,
|
|
|
|
|
fax=%s,
|
|
|
|
|
mobile=%s
|
|
|
|
|
WHERE id=%s
|
|
|
|
|
"""
|
|
|
|
|
sql_insert_region = """
|
|
|
|
|
INSERT INTO t_region
|
|
|
|
|
(id,region_code,regian_name,note,parent_id)
|
|
|
|
|
VALUES (%s, %s, %s, %s, %s);
|
|
|
|
|
"""
|
|
|
|
|
logging.basicConfig(level=logging.INFO,
|
|
|
|
|
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
|
2015-05-05 11:23:13 +08:00
|
|
|
datefmt='%Y-%m-%d %H:%M:%S',
|
|
|
|
|
handlers=[logging.FileHandler('excel.log', 'a', 'utf-8')])
|
2015-05-04 17:58:26 +08:00
|
|
|
_log = logging.getLogger('app.' + __name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _connect():
|
|
|
|
|
config = {
|
|
|
|
|
'user': 'root',
|
|
|
|
|
'password': 'mysql',
|
|
|
|
|
'host': '192.168.203.95',
|
|
|
|
|
'database': 'hangxin',
|
|
|
|
|
'raise_on_warnings': True,
|
|
|
|
|
}
|
|
|
|
|
cnx = None
|
|
|
|
|
try:
|
|
|
|
|
cnx = mysql.connector.connect(**config)
|
|
|
|
|
except mysql.connector.Error as err:
|
|
|
|
|
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
|
|
|
|
|
print("Something is wrong with your user name or password")
|
|
|
|
|
elif err.errno == errorcode.ER_BAD_DB_ERROR:
|
|
|
|
|
print("Database does not exist")
|
|
|
|
|
else:
|
|
|
|
|
print(err)
|
|
|
|
|
if cnx:
|
|
|
|
|
cnx.close()
|
|
|
|
|
return cnx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _init_table():
|
|
|
|
|
conn_ = _connect()
|
|
|
|
|
cursor = conn_.cursor()
|
|
|
|
|
cursor.execute(sql_create1)
|
|
|
|
|
cursor.execute(sql_create2)
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn_.commit()
|
|
|
|
|
conn_.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_sheet(wb, sheet_name, column_num, log_msg):
|
|
|
|
|
ws = wb[sheet_name] # ws is now an IterableWorksheet
|
|
|
|
|
result_list = []
|
|
|
|
|
for row in ws.rows:
|
|
|
|
|
row_data = []
|
|
|
|
|
for i, cell in enumerate(row):
|
|
|
|
|
if i >= column_num:
|
|
|
|
|
break
|
|
|
|
|
row_data.append(cell.value)
|
|
|
|
|
result_list.append(row_data[:])
|
2015-05-05 11:23:13 +08:00
|
|
|
_log.info(log_msg)
|
2015-05-04 17:58:26 +08:00
|
|
|
return result_list
|
|
|
|
|
|
|
|
|
|
|
2015-05-05 11:23:13 +08:00
|
|
|
def handle_wrong_line(wrong_line):
|
|
|
|
|
pass
|
|
|
|
|
|
2015-05-04 17:58:26 +08:00
|
|
|
def xlsx_to_table(xlsx_name):
|
|
|
|
|
conn_ = _connect()
|
2015-05-05 11:23:13 +08:00
|
|
|
_log.info('Excel文件解析start')
|
2015-05-04 17:58:26 +08:00
|
|
|
wb = load_workbook(xlsx_name, read_only=True)
|
|
|
|
|
# 先收集企业资料表
|
|
|
|
|
list1 = parse_sheet(wb, 'customer', 6, 'customer表解析end')
|
|
|
|
|
data1 = [(v[0], v[1], v[2], v[3], v[4], v[5]) for v in list1[1:]]
|
|
|
|
|
# 收集地址和联系人表
|
|
|
|
|
list2 = parse_sheet(wb, 'addr', 10, 'addr表解析end')
|
|
|
|
|
data2 = [(v[2], v[4], v[5], v[6], v[7], v[8], v[0]) for v in list2[1:]]
|
|
|
|
|
# 收集区域表
|
|
|
|
|
list3 = parse_sheet(wb, 'region', 5, 'region表解析end')
|
|
|
|
|
data3 = [(v[0], v[1], v[2], v[3], v[4]) for v in list3[1:]]
|
2015-05-05 11:23:13 +08:00
|
|
|
_log.info('Excel文件解析end')
|
|
|
|
|
_log.info('---------------------------分割线--------------------------------')
|
|
|
|
|
_log.info('数据库更新start')
|
2015-05-04 17:58:26 +08:00
|
|
|
cursor = conn_.cursor()
|
2015-05-05 11:23:13 +08:00
|
|
|
try:
|
|
|
|
|
_log.info('插入企业资料start')
|
|
|
|
|
for i, d1 in enumerate(data1):
|
|
|
|
|
if len(d1[1]) > 300 and not d1[2]:
|
|
|
|
|
_log.error('这一行有问题')
|
|
|
|
|
handle_wrong_line(d1)
|
|
|
|
|
continue
|
|
|
|
|
cursor.execute(sql_insert_enterprise, d1)
|
|
|
|
|
if i % 50 == 0:
|
|
|
|
|
conn_.commit()
|
|
|
|
|
conn_.commit()
|
|
|
|
|
_log.info('插入企业资料end')
|
|
|
|
|
|
|
|
|
|
_log.info('更新企业联系信息start')
|
|
|
|
|
for i, d2 in enumerate(data2):
|
|
|
|
|
cursor.execute(sql_update_enterprise, d2)
|
|
|
|
|
if i % 50 == 0:
|
|
|
|
|
conn_.commit()
|
|
|
|
|
conn_.commit()
|
|
|
|
|
_log.info('插入企业资料表end')
|
|
|
|
|
|
|
|
|
|
_log.info('插入区域信息start')
|
|
|
|
|
for i, d3 in enumerate(data3):
|
|
|
|
|
cursor.execute(sql_insert_region, d3)
|
|
|
|
|
if i % 50 == 0:
|
|
|
|
|
conn_.commit()
|
|
|
|
|
conn_.commit()
|
|
|
|
|
_log.info('插入区域信息end')
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
logging.exception('Got exception on db handler')
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
_log.info('数据库更新end')
|
2015-05-04 17:58:26 +08:00
|
|
|
cursor.close()
|
|
|
|
|
conn_.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
excel = r'D:\download\20150505\gdc.xlsx'
|
|
|
|
|
_init_table()
|
|
|
|
|
conn = _connect()
|
|
|
|
|
xlsx_to_table(excel)
|
|
|
|
|
pass
|