python pandas学习记录 二

api_access_20200821.log

3.16.25.4 - - [21/Aug/2020:00:00:02 +0800] "GET /erp/scp/connect/health HTTP/1.1" 200 54 0.060 "-" "-"
1.4.134.24 - - [21/Aug/2020:00:00:02 +0800] "GET /erp/scp/connect/health HTTP/1.1" 200 54 0.733 "-" "-"
1.4.134.24 - - [21/Aug/2020:00:00:02 +0800] "GET /api/index/home HTTP/1.1" 200 65 0.003 "-" "-"
1.6.227.186 - - [21/Aug/2020:00:00:08 +0800] "GET /erp/android/query/patch?versionName=2.2.7 HTTP/1.1" 200 50 0.026 "device:android22;model:vivovivo V3M A;version:2.2.7;imei:862350031383868" "0e04d119bb"
#根据空格区分  git 窗口  生成文件后 文本编辑器打开csv  输入头标题
cut -d' ' -f7,11 api_access_20200821.log >api_access_20200821.csv
#过滤 .png  |.js| /app/  行数据 另外村为21ok.csv
grep -v .png api_access_20200821.csv |grep -v .js |grep -v '/app/' > 21ok.csv

开始解析

import pandas as pd

# grep -v '.html' t21.log | grep -v '.js' | grep -v '.css' | grep -v '.png' | grep -v '.txt' | grep -v '.jpg' | grep -v '.woff' | grep -v '.ttf' | grep -v '.ico'| grep -v '/check/image' > t21_api.log
# grep -v '.html' t25.log | grep -v '.js' | grep -v '.css' | grep -v '.png' | grep -v '.txt' | grep -v '.jpg' | grep -v '.woff' | grep -v '.ttf' | grep -v '.ico'| grep -v '/check/image' > t25_api.log
def a(f):
    data = pd.read_csv(f, sep = ' ')
    data['uri'] = data['url'].str.split('?').str[0]
    print(data)
    agg = data.groupby('uri').agg(['count','min', 'max', 'mean'])
    print (agg)
    print (type(agg))
    agg.to_excel('agg_' + f+ ".xlsx")
    

#a('t21_api.log')
#a('t25_api.log')

a('21ok.csv')

原文地址:https://www.cnblogs.com/lanliying/p/13571137.html