92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
#!/usr/bin/env python
|
||
# -*- encoding: utf-8 -*-
|
||
"""
|
||
Topic: 一个简单的日志解析工具
|
||
Desc :
|
||
日志格式如下:
|
||
* 客户端IP地址
|
||
* 客户标识:通常不可靠,可以不记录
|
||
* 认证用户名:如果无需认证也没有
|
||
* 请求接受时间:包括日期、时间、地区
|
||
* 请求内容:进一步划分为:方法、资源、请求参数、协议
|
||
* 状态码:HTTP状态码
|
||
* 返回对象大小:byte为单位
|
||
* 提交方Referrer:通常是连接到Web页面或资源的URI或URL
|
||
* 用户代理User Agent:客户端程序比如Mozilla、Chrome等
|
||
"""
|
||
import re
|
||
import inspect
|
||
from pymongo import Connection
|
||
|
||
LINE_REGEX = re.compile(r'(\d+\.\d+\.\d+\.\d+) ([^ ]*) ([^ ]*) '
|
||
r'\[([^\]]*)\] "([^"]*)" (\d+) ([^ ]*) '
|
||
r'"([^"]*)" "([^"]*)"')
|
||
|
||
|
||
class ApacheLogRecord():
|
||
def __init__(self, *rgroups):
|
||
self.ip, self.ident, \
|
||
self.http_user, self.time, \
|
||
self.request_line, self.http_response_code, \
|
||
self.http_response_size, self.referrer, \
|
||
self.user_agent = rgroups
|
||
self.http_method, self.url, self.http_vers = self.request_line.split()
|
||
|
||
def __str__(self):
|
||
return ' '.join([self.ip, self.ident, self.time, self.request_line,
|
||
self.http_response_code, self.http_response_size,
|
||
self.referrer, self.user_agent])
|
||
|
||
|
||
class ApacheLogFile():
|
||
def __init__(self, logfile):
|
||
self.filename = logfile
|
||
|
||
def my_generator(self):
|
||
_match = LINE_REGEX.match
|
||
print(self.filename)
|
||
with open(self.filename, encoding='utf-8') as f:
|
||
for line in f:
|
||
m = _match(line)
|
||
if m:
|
||
print(line)
|
||
try:
|
||
log_line = ApacheLogRecord(*m.groups())
|
||
yield log_line
|
||
except GeneratorExit:
|
||
pass
|
||
except Exception as e:
|
||
print('NON_COMPLIANT_FORMAT: ', line, 'Exception: ', e)
|
||
|
||
|
||
def props(ob):
|
||
pr = {}
|
||
for name in dir(ob):
|
||
val = getattr(ob, name)
|
||
if not name.startswith('__') and not inspect.ismethod(val):
|
||
pr[name] = val
|
||
return pr
|
||
|
||
|
||
def insert_log():
|
||
connection = Connection('localhost', 27017)
|
||
db = connection.mydb
|
||
collection = db.logdata
|
||
alf = ApacheLogFile(r'D:\work\gitproject\python3-cookbook\configs\app.log')
|
||
for lg_line in alf.my_generator():
|
||
collection.insert(props(lg_line))
|
||
|
||
|
||
def query_log():
|
||
connection = Connection('localhost', 27017)
|
||
db = connection.mydb
|
||
collection = db.logdata
|
||
for doc in collection.find():
|
||
print(doc)
|
||
connection.close()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
""""""
|
||
query_log()
|