nginx日志每日定时写入Hdfs

#!/bin/bash
hadoop_home=/opt/hadoop-2.4.0
tw_nginx_log_file=/home/chiline.com.all/access_com_tw.log
cn_nginx_log_file=/home/chiline.com.all/access_com_cn.log
current_date=$(date +%Y%m%d)
hdfs_url=hdfs://xx.xx.xx.xx:9100
analyse_jar_path=$hadoop_home/ianc


echo "hadoop_home = $hadoop_home"
echo "tw_nginx_log_file = $tw_nginx_log_file"
echo "cn_nginx_log_file = $cn_nginx_log_file"
echo "hdfs_url = $hdfs_url" 
echo "analyse_jar_path = $hadoop_home/ianc"


function putTodayLogToHdfs(){
  
  $hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$(addDate $current_date 1)
  $hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)
  $hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
  $hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
  $hadoop_home/bin/hdfs dfs -put $tw_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
  $hadoop_home/bin/hdfs dfs -put $cn_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
}


function addDate(){
  str=$1
  days=$2
  yy=`echo $str|cut -c 1-4`
  mm=`echo $str|cut -c 5-6`
  dd=`echo $str|cut -c 7-8`
  sav_dd=$days
  days=`expr $days - $dd`
  while [ $days -ge 0 ]
  do
        mm=`expr $mm - 1`
        [ $mm -eq 0 ] && mm=12 && yy=`expr $yy - 1`
        aaa=`cal $mm $yy`
        bbb=`echo $aaa|awk '{print $NF}'`
        days=`expr $days - $bbb`
  done
  dd=`expr 0 - $days`
  expr $dd : "^.$" > /dev/null && dd=0$dd
  expr $mm : "^.$" > /dev/null && mm=0$mm
  echo $yy$mm$dd
  return $yy$mmSdd
}


function getLogFileName(){
   logFileName=$1
   IFS='/' arr=($logFileName)
   arr_length=${#arr[@]}
   echo ${arr[$arr_length-1]}
   return ${arr[$arr_length-1]}
}


function removeLastWeekLog(){
  remove_date=$(addDate $current_date 7)
  echo "start remove history log file,remove_date is $remove_date"
  $hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$remove_date
}


function analyseTodayLog(){


   #tw_log_file_name=getLogFileName $tw_nginx_log_file
   tw_log_file_name=`basename $tw_nginx_log_file`
   #cn_log_file_name=getLogFileName $cn_nginx_log_file
   cn_log_file_name=`basename $cn_nginx_log_file`
   tw_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/tw-log/$tw_log_file_name
   cn_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/cn-log/$cn_log_file_name
   analyse_path=$hdfs_url/user/day-$(addDate $current_date 1)/analyse
   
   #pv analyse
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/pvanalyse-1.0.jar $tw_log_file $analyse_path/tw-pv
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/pvanalyse-1.0.jar $cn_log_file $analyse_path/cn-pv
   #time analyse
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/timeanalyse-1.0.jar $tw_log_file $analyse_path/tw-time
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/timeanalyse-1.0.jar $cn_log_file $analyse_path/cn-time
   #area analyse
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/locationanalyse-1.0.jar $tw_log_file $analyse_path/tw-location
   $hadoop_home/bin/hadoop jar  $analyse_jar_path/locationanalyse-1.0.jar $cn_log_file $analyse_path/cn-location
}


echo "start put local log to hdfs"
putTodayLogToHdfs;
echo "start analyse today log"
analyseTodayLog;
echo "remove last week log"
removeLastWeekLog;
原文地址:https://www.cnblogs.com/mengfanrong/p/5196055.html