Files
python3-cookbook/basic/samples/mongodb/logparser.py
XiongNeng 76b4ddc829 chang all
2015-05-07 12:02:58 +08:00

92 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
Topic: 一个简单的日志解析工具
Desc :
日志格式如下:
* 客户端IP地址
* 客户标识:通常不可靠,可以不记录
* 认证用户名:如果无需认证也没有
* 请求接受时间:包括日期、时间、地区
* 请求内容:进一步划分为:方法、资源、请求参数、协议
* 状态码HTTP状态码
* 返回对象大小byte为单位
* 提交方Referrer通常是连接到Web页面或资源的URI或URL
* 用户代理User Agent客户端程序比如Mozilla、Chrome等
"""
import re
import inspect
from pymongo import Connection
LINE_REGEX = re.compile(r'(\d+\.\d+\.\d+\.\d+) ([^ ]*) ([^ ]*) '
r'\[([^\]]*)\] "([^"]*)" (\d+) ([^ ]*) '
r'"([^"]*)" "([^"]*)"')
class ApacheLogRecord():
def __init__(self, *rgroups):
self.ip, self.ident, \
self.http_user, self.time, \
self.request_line, self.http_response_code, \
self.http_response_size, self.referrer, \
self.user_agent = rgroups
self.http_method, self.url, self.http_vers = self.request_line.split()
def __str__(self):
return ' '.join([self.ip, self.ident, self.time, self.request_line,
self.http_response_code, self.http_response_size,
self.referrer, self.user_agent])
class ApacheLogFile():
def __init__(self, logfile):
self.filename = logfile
def my_generator(self):
_match = LINE_REGEX.match
print(self.filename)
with open(self.filename, encoding='utf-8') as f:
for line in f:
m = _match(line)
if m:
print(line)
try:
log_line = ApacheLogRecord(*m.groups())
yield log_line
except GeneratorExit:
pass
except Exception as e:
print('NON_COMPLIANT_FORMAT: ', line, 'Exception: ', e)
def props(ob):
pr = {}
for name in dir(ob):
val = getattr(ob, name)
if not name.startswith('__') and not inspect.ismethod(val):
pr[name] = val
return pr
def insert_log():
connection = Connection('localhost', 27017)
db = connection.mydb
collection = db.logdata
alf = ApacheLogFile(r'D:\work\gitproject\python3-cookbook\configs\app.log')
for lg_line in alf.my_generator():
collection.insert(props(lg_line))
def query_log():
connection = Connection('localhost', 27017)
db = connection.mydb
collection = db.logdata
for doc in collection.find():
print(doc)
connection.close()
if __name__ == '__main__':
""""""
query_log()