shell脚本,日志提取

5715问答卡展现query获取:

处理脚本run.sh

#!/bin/bash
date="$1"
echo "跑日期为:$date的5715文章卡展和未展现query数据"
file="medbeauty-$1.log"
echo "全量日志保存至$file中"
#获取一天全量日志
hadoop fs -getmerge /app/dt/minos/3/70061577/$1/*/*/ ./$file
echo "全天AE日志拉取成功"
#获取5715问答的日志
wendalog="5715-$1.log"
echo "5715日志保存至$wendalog中"
awk '/srcid[5715/' $file > $wendalog
echo "5715日志抽取成功"
rm $file
#获取5715未展现日志
unshowlog="5715unshow-$1.log"
echo "5715未展现日志保存至$unshowlog中..."
grep -v 'res_answerId' $wendalog > $unshowlog
echo "5715未展现日志抽取成功"
#获取5715未展现日志的query
unshowquerylog="5715unshow-query-$1.log"
echo "5715日志的query保存至$unshowquerylog中..."
awk '{match($0,/.+orgquery[(.+?)] from/,a);print a[1]}' $unshowlog > $unshowquerylog
echo "5715未展现日志的query抽取成功"
#query合并&排序
sortlog="5715unshow-query-sort-uniq-$1.log"
echo "query合并&排序保存至$sortlog中..."
sort $unshowquerylog|uniq -c|sort -n -r > $sortlog
echo "5715query合并&排序成功"
echo "未展现部分完成"
echo "---------------------"
#获取5715展现日志
showlog="5715show-$1.log"
echo "5715展现日志保存至$showlog中..."
grep 'res_answerId' $wendalog > $showlog
echo "5715展现日志抽取成功"
#展现的query和answerId
showquerylog="5715show-query-answerid-$1.log"
echo "5715展现日志query&answerid保存至$showquerylog中..."
awk '{match($0,/.+orgquery[(.+?)] from.+answerId[(.+?)] resultCode/,a);print a[1],a[2]}' $showlog > $showquerylog
echo "5715展现日志query&answerid保存至完成"
echo "done!!!!!!!!!!!"

批量运行batch.sh

nohup ./run.sh 20210409 > myout-20210409.file 2>&1 &
nohup ./run.sh 20210410 > myout-20210410.file 2>&1 &
nohup ./run.sh 20210411 > myout-20210411.file 2>&1 &

命中反爬duedge的ip排序提取:

run.sh

#!/bin/bash
date="$1"
echo "跑日期为:$date的duedgef爬ip排序数据"
file="medbeauty-$1.log"
echo "全量日志保存至$file中"
#获取一天全量日志
hadoop fs -getmerge /app/dt/minos/3/70061577/$1/*/*/ ./$file
echo "全天AE日志拉取成功"
#获取5715问答的日志
duedgelog="duedge-$1.log"
duedgelogtemp="duedge-$1_temp.log"
echo "反爬日志保存至$duedgelog中,格式riskscore,ipv4,ipv6"
awk '{match($0,/user_ip[([0-9.]+?)].+user_ip_v6[([0-9a-zA-Z:]+?)].+duedge_spider[([1-9].+?)] upModule/,a);print a[3],a[1],a[2]}' $file > $duedgelogtemp
#删除空行
awk NF $duedgelogtemp > $duedgelog
echo "反爬日志抽取成功"
rm $file
rm $duedgelogtemp
#获取sort结果
duedgesortlog="duedge-$1-sort.log"
awk -F " " '$1>9{print $2,$3}' $duedgelog | sort -n | uniq -c | sort -n -r > $duedgesortlog
echo "保存完成"
echo "done!!!!!!!!!!!"
原文地址:https://www.cnblogs.com/erbing/p/14817339.html