shell 处理kettle日志

#!/bin/bash

output_number_file=tqxs_week_number.csv
output_times_file=tqxs_week_times.csv
#定义日期
date=`date "+%Y%m%d"`
#获取当前文件夹下同期线损日志文件
files=$(ls|grep JOB |grep log|grep "$date")

rm -rf ./tqxs_week_brief_log.$date
#定义日志函数
function LOG() {
  echo [`date "+%Y-%m-%d %H:%M:%S"`] '=>' $* | tee -a ./tqxs_week_brief_log.$date
}
#定义文件名称截取函数
function fun_filename_pro(){
    str=$1
    ind=`expr $(expr index $str ".log") - 10`
    echo  ${str:0:$ind}
}

#定义字符串处理函数
function fun_str_pro(){
    line=$1
    #取第一个'(',截取右边所有字符串
    str_pro1=${line#*(}
    #取第一个')',截取左边所有字符串
    str_pro2=${str_pro1%*)}
    #用空格替换','
    str_pro3=${str_pro2//, /' '}
    case $2 in
        1)  str_pro=$(echo $str_pro3|awk '{print $1}')
        ;;
        2)  str_pro=$(echo $str_pro3|awk '{print $2}')
        ;;        
        3)  str_pro=$(echo $str_pro3|awk '{print $3}')
        ;;    
        4)  str_pro=$(echo $str_pro3|awk '{print $4}')
        ;;    
        5)  str_pro=$(echo $str_pro3|awk '{print $5}')
        ;;    
        6)  str_pro=$(echo $str_pro3|awk '{print $6}')
        ;;
        7)  str_pro=$(echo $str_pro3|awk '{print $7}')
        ;;    
        8)  str_pro=$(echo $str_pro3|awk '{print $8}')
        ;;    
        9)  str_pro=$(echo $str_pro3|awk '{print $9}')
        ;;
        *)  str_pro=$(echo $str_pro3|awk '{print $10}')
        ;;         
    esac      
    echo ${str_pro#*=}
}

#定义获取日志中数据量函数
function get_number(){
  #循环处理当前日期$curDate日志文件
  for file in $files
  do
    temp_filename=$(fun_filename_pro $file)
    sum_I=0;
    sum_O=0;
    sum_R=0;
    sum_W=0;
    sum_U=0;
    sum_E=0;
    LOG "正在计算日志文件:"$file
    #循环按行读取日志文件
    while read line
    do 
      fp=$(echo $line|grep "Finished processing")
      if [[ "$fp" != "" ]]
      then
          sum_I=`expr $sum_I + $(fun_str_pro "${line}" 1)`
          sum_O=`expr $sum_O + $(fun_str_pro "${line}" 2)`
          sum_R=`expr $sum_R + $(fun_str_pro "${line}" 3)`
          sum_W=`expr $sum_W + $(fun_str_pro "${line}" 4)`        
          sum_U=`expr $sum_U + $(fun_str_pro "${line}" 5)`
          sum_E=`expr $sum_E + $(fun_str_pro "${line}" 6)`                       
      fi
    done < $file
    LOG "I:" $sum_I "O:" $sum_O "R:" $sum_R "W:" $sum_W "U:" $sum_U "E:" $sum_E
    echo  $date","$temp_filename","$sum_I","$sum_O","$sum_R","$sum_W","$sum_U","$sum_E >> $output_number_file
  done
}

#定义获取日志中时间函数
function get_times(){

  #循环处理当前日期$curDate日志文件
  for file in $files
  do
    LOG "正在处理获取时间的日志文件:"$file
    #获取打印时间终止判断依据
    pea=$(cat $file|grep "Processing ended after")
    temp_filename=$(fun_filename_pro $file)
    
    if [[ "$pea" != "" ]]
    then
           #获取超过一分钟判断依据
        st=$(echo $pea|grep "seconds total")
        #获取低于一分钟判断依据
        ss=$(echo $pea|grep "seconds")
        
        if [[ "$st" != "" ]]
        then
            LOG $(fun_str_pro "${pea}" 1)
            echo $date","$temp_filename","$(fun_str_pro "${pea}" 1) >> $output_times_file
        elif [[ "$ss" != "" ]]
        then
            LOG  $(fun_str_pro "${pea}" 9)
            echo $date","$temp_filename","$(fun_str_pro "${pea}" 9) >> $output_times_file
        else
            LOG $filestr "log err"
        fi
    else
        LOG $filestr "log err"
    fi
  done
}
#运行获取时间函数
#get_times

#运行获取数量函数
get_number

 
原文地址:https://www.cnblogs.com/aongao/p/14185121.html