shell脚本常规技巧

邮件相关

发送邮件：

#!/usr/bin/python

import sys;
import smtplib;
from email.MIMEText import MIMEText

mail_host = sys.argv[1]
mail_user = sys.argv[2]
mail_pass = sys.argv[3]
mail_from = sys.argv[4]
mail_to = sys.argv[5]
mail_to_list = mail_to.split(';')

mail_subject = sys.argv[6]
mail_content = sys.argv[7]

msg = MIMEText(mail_content, _subtype = 'html', _charset = 'utf-8')
msg['From'] = mail_from
msg['Subject'] = mail_subject
msg['To'] = mail_to
msg['MIME-Version'] = "1.0"
msg['Content-Type'] = "text/html"


try:
    client = smtplib.SMTP()
    client.connect(mail_host)
    client.login(mail_user, mail_pass)
    client.sendmail(mail_from, mail_to_list, msg.as_string())
except:
    print "something error..."

用法：python ./send_mail.py $MAIL_HOST $MAIL_USER $MAIL_PASS $MAIL_FROM $MAIL_TO "$MAIL_SUBJECT" "$MAIL_CONTENT"

带附件版本：

#!/usr/bin/python

import os
import sys;
import smtplib;
from email.MIMEText import MIMEText
from email.MIMEMultipart import MIMEMultipart

mail_host = sys.argv[1]
mail_user = sys.argv[2]
mail_pass = sys.argv[3]
mail_from = sys.argv[4]
mail_to = sys.argv[5]
mail_to_list = mail_to.split(';')

mail_subject = sys.argv[6]
mail_content = sys.argv[7]
mail_attach  = '' 

if len(sys.argv) > 8:
        mail_attach  = sys.argv[8]
        #print mail_attach

msg = MIMEMultipart()
# 
body = MIMEText(mail_content, _subtype = 'html', _charset = 'utf-8')
msg.attach(body)
#
if os.path.isfile(mail_attach):
        att = MIMEText(open(mail_attach,'rb').read(),'base64','gb2312')
        att["Content-Type"] = 'application/octet-stream'
        att["Content-Disposition"] = 'attachment;filename="' + os.path.basename(mail_attach) +'"'
        msg.attach(att)

msg['From'] = mail_from
msg['Subject'] = mail_subject
msg['To'] = mail_to
msg['MIME-Version'] = "1.0"
msg['Content-Type'] = "text/html"

try:
    client = smtplib.SMTP()
    client.connect(mail_host)
    client.login(mail_user, mail_pass)
    client.sendmail(mail_from, mail_to_list, msg.as_string())
except:
    print "something error..."

发送脚本：

#! /bin/bash

export LC_ALL="en_US.UTF-8"

if [ $# -lt 1 ]
then
        echo "Usage: $0 process_name"
        exit -1
fi

cd $(dirname $0)
LOCALIP=`/sbin/ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " "` 
HOSTNAME=`hostname`
gday=`date +"%x %X"`

MAIL_HOST="smtp.exmail.qq.com"
MAIL_USER="op@test.com"
MAIL_PASS="optest"
MAIL_FROM="op@test.com"
MAIL_TO="chenzhj@test.com"
MAIL_SUBJECT="$HOSTNAME[$LOCALIP] $1 $gday"

MAIL_CONTENT=""

echo "$MAIL_SUBJECT"

#echo "python ./send_mail.py" $MAIL_HOST $MAIL_USER $MAIL_PASS $MAIL_FROM $MAIL_TO "$MAIL_SUBJECT" "$MAIL_CONTENT"
python ./send_mail.py $MAIL_HOST $MAIL_USER $MAIL_PASS $MAIL_FROM $MAIL_TO "$MAIL_SUBJECT" "$MAIL_CONTENT"

生成表格邮件：

#!/bin/sh

TIME=`date +%Y-%m-%d -d "1 days ago"`

echo "<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">"
echo "<html xmlns="http://www.w3.org/1999/xhtml">"
echo "<head>"
echo "<meta http-equiv="Content-Type" content="text/html;charset=gb2312">"
#echo "<meta http-equiv=Content-Type content="text/html; charset=utf-8">"
echo "</head>"
echo " <body>"
echo "      <h2>今日半价mysql数据统计</h2>"
echo '      <table border="1">'
echo "          <tr>"
echo "              <th>DateTime</th>"
echo "              <th>统计站点</th>"
echo "              <th>总商品数</th>"
echo "              <th>在架商品数</th>"
echo "              <th>下架商品数</th>"
echo "              <th>增量总数</th>"
echo "              <th>在架的增量数</th>"
echo "          </tr>"
YESTERDAY=`date +%Y-%m-%d -d'-1 days'`
cat ../data/static.$YESTERDAY | while read line
do
    echo $line | awk '
    {

        print "			<tr>
				<td>'$TIME'</td>"
        for (i=1; i<=6; i++) {

            print "				<td>"$i"</td>"
        }
        print "			</tr>"
    }'
done
echo "      </table>"
echo "      </body>"
echo "      </html>"

awk

1、传递外部shell参数(-v)、同时处理多文件（getline）示例：

#!/bin/bash

if [ $# -ne 2 ]
then
        echo "Usage: $0  local_log  bc_log"
        exit
fi


awk -v bc_log=$2 'BEGIN{
SRC=0; 
TAR=0;
/*print "bc_log:",bc_log;*/
}{
    SRC=$1;

    close(bc_log);
    while(getline TAR < bc_log )
    {
        if(TAR>SRC)
        {
            print SRC; 
            break;
        } 
        if(TAR==SRC)
        {
            break;
        }
    } 
}

字典过滤

#! /bin/gawk -f 

BEGIN{ 
        total=0;
        while( getline< "/home/hadoop/zhenjing/solr/2.data" >0) 
        {  
                dict[$1] = 1;
                total++;
        }
        #print total;
}

{
        if( $1 in dict ){
            next
        }

        print $1
}

END{
}

分析日志，计算平均时间

#! /bin/gawk -f

START {
    sum_time["item.get"] = 0;
    sum_count["item.get"] = 0;
}

{
    tmp = substr($0, index($0, "method"));

    split(tmp, arr, """);
    method = arr[3];

    if(method == "") {
        next;
    }

    tmp = substr($0, index($0, "time:"));
    split(tmp, arr, ":");
    time = arr[2];

    # print method, time;

    if(time > 0){
        sum_time[method] = sum_time[method] + time;
        sum_count[method] = sum_count[method] + 1;
    }
}

END {
    for(method in sum_time){
        if(sum_count[method] > 0){
            print method " = " sum_time[method] / sum_count[method];
        }
    }
}

awk统计tomcat android机型：

#! /bin/gawk -f

# tail  /tmp/temai.txt | grep Android | cut -d" " -f 18-23 | cut -d')' -f1

BEGIN{
        FS = "; ";
}

{
        if( $1 in dict_os ){
                dict_os[$1] = dict_os[$1] + 1;
        } else {
                dict_os[$1] = 1;
        }

        if( $3 in dict_jixing ){
                dict_jixing[$3] = dict_jixing[$3] + 1;
        } else {
                dict_jixing[$3] = 1;
        }
}


END {
    for(os in dict_os){
            print  dict_os[os] " "  os;
    }

    for(jx in dict_jixing){
            print  dict_jixing[jx] " " jx;

awk平均分拆文件：

awk '{hash=int(NR % 10 ); print $0 >> "outfile_tmp_"hash }' weiboid.txt

时间相关

基于时间的循环：date命令

typeset readonly START_DATE=$(date --date="$1" +%Y%m%d);

for ((CUR_DATE=START_DATE; CUR_DATE<=END_DATE; CUR_DATE=$(date --date="${CUR_DATE} 1 days" +%Y%m%d) ))

Linux时钟分为系统时钟（System Clock）和硬件（Real Time Clock，简称RTC）时钟。系统时钟是指当前Linux Kernel中的时钟，而硬件时钟则是主板上由电池供电的时钟，这个硬件时钟可以在BIOS中进行设置。当Linux启动时，硬件时钟会去读取系统时钟的设置，然后系统时钟就会独立于硬件运作。Linux中的所有命令（包括函数）都是采用的系统时钟设置。在Linux中，用于时钟查看和设置的命令主要有date、hwclock和clock。其中，clock和hwclock用法相近。

date 010318242008.30(月日时分年.秒)

date 010318242008(月日时分年)

date -s 20080103

date -s 18:24

clock -w

hwclock --set --date="07/07/06 10:19" （月/日/年时:分:秒）

硬件时钟与系统时钟同步：# hwclock --hctosys（hc代表硬件时间，sys代表系统时间）或者# clock --hctosys

系统时钟和硬件时钟同步：# hwclock --systohc或者# clock --systohc

xargs

echo "FILE.txt" | grep -v '^$'
| xargs -L 1 -I {} find . -maxdepth 1 -type f -name "{}"
| sort | xargs -L 1 basename | xargs md5sum

somecommand | xargs -s 50000 echo | xargs -I '{}' -s 100000 rm '{}'

LOCALIP=`/sbin/ifconfig eth1 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " "`

LOCALIP="$(/sbin/ifconfig eth1 | sed -n '/inet addr/s/.*inet addr:([0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}).*/1/gp')"

LOCAL_IP="$(/sbin/ifconfig | sed -n 's/^.*inet addr:([[:digit:].]{7,14}).*$/1/gp' | head -1)";

array 相关

#! /bin/bash

colors=(red green blue)

element_count=${#colors[@]}
# Special syntax to extract number of elements in array.
#     element_count=${#colors[*]} works also.
#
#  The "@" variable allows word splitting within quotes
#+ (extracts variables separated by whitespace).
#
#  This corresponds to the behavior of "$@" and "$*"
#+ in positional parameters. 

# array loop:
# Way 1:
index=0
while [ "$index" -lt "$element_count" ]
do    # List all the elements in the array.
  echo ${colors[$index]}
  #    ${colors[index]} also works because it's within ${ ... } brackets.
  let "index = $index + 1"
  # Or:
  #    ((index++))
done

# Way 2:
# Each array element listed on a separate line.
# If this is not desired, use  echo -n "${colors[$index]} "
# Doing it with a "for" loop instead:
for i in "${colors[@]}"
do
    echo "$i"
done


echo 
echo "For test:"

# Usage: ArrayCheckExist "$value" "${arr[@]}"
function ArrayCheckExist
{
    value=$1
    i=0
    for str in "$@"
    do
        ((i++))
        if [ $i -eq 1 ]; then  # first parameter is checked value
            continue
        fi

        if [[ "$value" == "$str" ]]; then
            #echo "$value" "$str"
            return 1   #  exist
        fi
    done
    return 0
}


dirs=()  # empty array
index=0

while read ip dir info tomcat port
do
    if [[ $ip == "#" || -z $port ]]
    then
        continue
    fi

    # index=`echo -n $tomcat |md5sum|cut -d ' ' -f1`   # get string md5sum
    ArrayCheckExist "$tomcat" "${dirs[@]}"
    if [ $? -eq 0 ]; then
        dirs[ ${#dirs[@]} ]=$tomcat
    fi
done < ./di.conf

for i in "${dirs[@]}"
do
    echo "$i"
done


echo "Array function test:"

ArrayCheckExist "${dirs[@]}"
return_val=$?
if [[ $return_val -gt 0 ]]; then
     echo "${dirs[0]} Exist."
fi

ArrayCheckExist "test" "${dirs[@]}"
if [ $? -gt 0 ]; then
     echo "test Exist."
fi

ArrayCheckExist "test"
if [ $? -gt 0 ]; then
     echo "test Exist."
fi

ArrayCheckExist "/usr/local/webserver/di/tomcat-di2"  "${dirs[@]}"
if [ $? -gt 0 ]; then
     echo "Exist."
fi

统计脚本

solr：
cat shards | awk -F "QTime=" '{sum+=$2; cnt++; if($2> 500) slow++;} END{print cnt; print slow++; print sum/cnt; print slow/cnt}'

dc：
cat dc.log | grep -o "spending time:[0-9]+" | awk -F: '{sum+= $2; cnt++; if($2 > 500) slow++ } END{print cnt; print slow; print sum/cnt; print slow/cnt}'

rts：
cat dc.log | grep -o "search rts used: [0-9]+" | awk -F: '{sum+= $2; cnt++; if($2 > 20) slow++ } END{print cnt; print slow; print sum/cnt; print slow/cnt}'

searchserver：
cat searchserver.log.2013-07-08.log | grep -o "totalTime=[0-9]+" | awk -F= '{sum+= $2; cnt++; if($2 > 500) slow++ } END{print cnt; print slow; print sum/cnt; print slow/cnt}'

appserver:
cat userTime-appserver-2013-07-25_* | awk '{sum+= $5; cnt++; if($5 > 500) slow++ } END{print cnt; print slow; print sum/cnt; print slow/cnt}'

solr相关

solr core数据对比

#! /bin/bash

if [ $# -lt 3 ]
then
    echo "Usage $0 query_file solr1 solr2 [out_file]"
    exit -1
fi

OUT_FILE="/tmp/query"
if [ $# -gt 3 ]
then
OUT_FILE=$4
fi

while read query 
do
    query=`echo $query | sed 's/wt=javabin/wt=json/g'`

    # KEY: set wt=json for solr request.
    wget "http://$2/solr/C2C_TAOBAO_0/select/?$query" -q  -O ${OUT_FILE}_0
    wget "http://$3/solr/C2C_TAOBAO_0/select/?$query" -q  -O ${OUT_FILE}_1

    # del QTime from solr "json" response
    sed  -i 's/"QTime":[0-9]+,//g'  ${OUT_FILE}_0
    sed  -i 's/"QTime":[0-9]+,//g'  ${OUT_FILE}_1

    cmp  ${OUT_FILE}_0  ${OUT_FILE}_1 > /dev/null
    if [ "$?" != 0 ]
    then
        echo "$query"
    fi

done < $1

solr查询参数提取

#! /bin/gawk -f

/select params={/,/}/ {
        if( match($0, /{[^}]+}/, str)){
                pos = length(str[0]) - 2
                print substr(str[0], 2, pos)
        }
 }

获取淘宝真实链接：

#! /bin/bash

if [ $# -lt 1 ]
then
        echo "Usage: $0 url"
        exit 1
fi

export LC_ALL=en_US.UTF-8

OUT=log

wget $1 -o $OUT -O tmp

req_arr=`grep "Location:" $OUT | grep "s.click.taobao.com" | cut -d" " -f 2`

taobao_req=`echo $req_arr | cut -d" " -f 2`
refer_req=`echo $req_arr | cut -d" " -f 3`

ext_key=`echo $refer_req | awk -F"et%3D" '{print $2}' | sed -e 's/%25/%/g'`

#echo "$taobao_req&ref=&et=$ext_key"
#echo "curl  "$taobao_req&ref=&et=$ext_key" -I -e "$refer_req""
#curl -s  "$taobao_req&ref=&et=$ext_key" -I -e "$refer_req" 

curl -s "$taobao_req&ref=&et=$ext_key" -I -e "$refer_req" | grep "^Location" | cut -d" " -f 2

日常数据备份脚本

#! /bin/bash

export LC_ALL=en_US.UTF-8
echo `date`
cd `dirname $0`
pwd

echo `date +%s`
# back taday data
DATE=`date +%Y-%m-%d`

SQL_FILE="jira_$DATE.sql"
/opt/lampp/bin/mysqldump --default-character-set=utf8 -h 10.1.22.40 -u jira -pkoudai123  jira > $SQL_FILE

r1=`ls -ld $SQL_FILE | awk '{print int($5/1024)}'`

DATA_FILE="jira_home_$DATE.tar.gz"
tar czf  $DATA_FILE  /data/server/jira-home

r2=`ls -ld $DATA_FILE | awk '{print int($5/1024)}'`

echo $r1
echo $r2

echo `date +%s`

if [[ $r1 -gt 1024 && $r2 -gt 1024 ]]
then
# del old data
DELDAY=`date --date='-15 day' +%Y-%m-%d`

echo "del data : $DELDAY"

rm *$DELDAY*

fi

echo "`date +%s` FINISH !"

操作文件

#! /bin/bash

if [ $# -lt 3 ]
then
    echo "Usage $0 in_file out_file num"
    exit -1
fi

INPUT=$1
OUTPUT=$2

num=100
if [ $# -gt 2 ]
then
    num=$3
fi

for i in `seq 0 $num`
do
    echo "$i"

    awk -F ^ -v prefix=$i '{ if(NF > 10) print prefix$0}' ${INPUT} >> ${OUTPUT}
done