统计nginx日志里每五分钟的访问量

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Random_lee
import time
import os
import re


class StatusPV(object):
    def __init__(self):
        self.log_path = '/opt/apache-tomcat-7.0.69/logs/'
        self.log_time = time.strftime("%Y-%m-%d")
        self.log_name = 'localhost_access_log.%s.txt'%(self.log_time)
        self.logfile = os.path.join(self.log_path, self.log_name)

    def get_filesize(self):
        file_size = os.path.getsize(self.logfile)
        f = open(self.logfile, 'r')
        if file_size > 1000000000:
            # 文件大小超过1G从中间读取
            f.seek(0, 2)
            self.seek= f.tell()/5
        else:
            self.seek=0
        f.close()
    def count_pv(self):
        self.get_filesize()
        if not os.path.exists(self.logfile):
            print('error:' + self.logfile + ' not existed.')
            return 0
        else:
            f = open(self.logfile, 'r')
            f.seek(self.seek,0)
            num = 0
            for line in f:
                data = time.strftime('%d/%b/%Y:%H:', time.localtime())
                if data in line:
                    expr = re.compile('^(?P<RemoteIP>.*) - - (?P<datatime>.*) (?P<request>".+") (?P<status>d{3}) (?P<web_size>d{1,10})')
                    # 日志格式 10.116.201.71 - - [02/Sep/2018:09:44:13 +0800] "POST /servlet/UpdateJavaFXServlet HTTP/1.0" 200 268
                    try:
                        log_info = expr.search(line)
                        log_info = log_info.groupdict()

                        # 解析日志信息
                        datatime = log_info["datatime"]
                        # 取出日志信息中的datatime
                        datatime = datatime.replace('[', '')
                        datatime = datatime.replace(']', '')
                        # 去掉[]
                        data_time = datatime.split(' ')[0]
                        # 取出日期时间
                        time_zone = datatime.split(' ')[1]
                        # 取出时区
                        if time_zone == '+0800':
                            # print(time_zone)
                            # print(data_time)
                            ctime = time.strptime(data_time, '%d/%b/%Y:%H:%M:%S')
                            # 转换为格式化时间 24/Aug/2018:15:42:08
                            time_stamp = time.mktime(ctime)
                            # 转换为时间戳
                            # print(time_stamp)
                            if time.time() - time_stamp <= 300:
                                # 观测的时间间隔
                                num += 1
                                # print(datatime)
                            else:
                                # print("error data_time:%s"%datatime)
                                pass
                        else:
                            print("log format error")
                    except:
                        pass
                else:
                    pass
            f.close()
            print(num)


if __name__ == '__main__':
    obj_StatusPV = StatusPV()
    obj_StatusPV.count_pv()

  

原文地址:https://www.cnblogs.com/randomlee/p/9490466.html