AWK Demo

　　AWK is so poweful. AWK功能太强大了:

　　　　@1:处理格式化文本(感觉主要还是以这个为主)
　　　　@2:对文本逐行进行处理(过滤某些行,保留符合条件的)
　　　　@3:将文件根据不同的条件分成多个文件
　　　　@4:统计文件中的符合某些条件的文本行数

　　　　......

Demo1:

#!/bin/bash
#File: awkScript1.sh
#Author: lxw
#Time: 2014-08-08
#Usage: 'awk' demo.

main(){
    #$0: The whole line.    $1: The first column.    $2: The second column.    ...
    echo -e "
DEMO1:-----------------------------------------------------"
    head -n 5 ./demo.txt|awk 'BEGIN{FS="|"}{print $0}'

    echo -e "
DEMO2: PRINTF FORMAT---------------------------------------"
    head -n 20 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"}{printf "%-8s%-5s%-10s
", $1, $2, $4}'

    echo -e "
DEMO3: FILTER-----------------------------------------------"
    head -n 30 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $2=="CN" && $3=="ipv4" {printf "%-8s%-5s%-10s
", $1, $2, $4}'

    echo -e "
DEMO4: FS-NR-NF-FILENAME------------------------------------"
    head -n 40 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $2=="CN" || NR==1 {printf "%-10s%-03d%-3d%-8s%-5s%-10s
", FILENAME, NR, NF, $1, $2, $4}'

    echo -e "
DEMO5: OFS--------------------------------------------------"
    head -n 50 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $2=="CN" || NR==1 {print FILENAME, NR, NF, $1, $2, $4}' OFS="	"

    #String match
    #@1:'~' means to match.    @2:The string between // is the RE.
    echo -e "
DEMO6: String Match------------------------------------------"
    head -n 50 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $3 ~ /ip.4/ || NR==1 {print NR, $1, $2, $3, $4}' OFS="	"

    echo -e "
DEMO7: The line which contains the specific string.----------"
    head -n 60 ./demo.txt|tail -n 10|awk '/.*12.*|.*14.*/'

    echo -e "
DEMO8: The field which DOES NOT contain the specific string.--"
    head -n 60 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $6 !~ /.*0412.*|.*0414.*/'

    #Split the file.
    echo -e "
DEMO9: Split the file.----------------------------------------"
    head -n 70 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $2 != "" {print > $2}'
    head -n 70 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} $2 != "" {print NR, $4 > $2"-1"}'
    head -n 70 ./demo.txt|tail -n 10|awk 'BEGIN{FS="|"} {if($2 ~ /CN/) print > "cn.txt"; else if($2 ~ /TH/) print > "th.txt"; else print > "others.txt";}'

    #Count.
    echo -e "
DEMO10: Count.------------------------------------------------"
    awk 'BEGIN{FS="|"} $2 != "" {arr[$2]++}; END{for(i in arr) print i, ", ", arr[i];}' ./demo.txt

    #DON'T MIND TOO MUCH about the following 4 cases.
    #The "assignment WITH -v" can be accessed in BEGIN & PROCESS & END.
    echo -e "
DEMO11: "-v" ----------------------------------------"
    awk -v a=1 'BEGIN{print "BEGIN:" a} NR==1 {print "PROCESS:" a}END{print "END:" a}' a=1 ./demo.txt
    #Case that "assignment WITHOU -v" only accessed in PROCESS & END.
    echo -e "
DEMO12 ------------------------------------------------"
    awk 'BEGIN{print "BEGIN:" a} NR==1 {print "PROCESS:" a}END{print "END:" a}' a=1 ./demo.txt
    #Case that "assignment WITHOU -v" only accessed in END.
    echo -e "
DEMO13 ------------------------------------------------"
    awk 'BEGIN{print "BEGIN:" a} NR==1 {print "PROCESS:" a}END{print "END:" a}' ./demo.txt a=1
    echo -e "
DEMO14 ------------------------------------------------"
    awk 'BEGIN{print "BEGIN:" a}a < 15{print "PROCESS:" a; ++a}END{print "END:" a}' a=10 ./demo.txt
}

main

　　关于赋值操作有以下几点说明:

　　@1:awk中变量值只有两种形式：字符串和数值

　　@2:如果变量赋值在第一个文件参数之前，在BEGIN动作之后执行，影响到正常处理和END动作；

　　@3:如果变量赋值在最后一个文件参数之后，在END动作之前执行，仅影响END动作；

　　@4:如果文件参数不存在，情况同1所述；

　　@5:如果变量赋值位于多个文件参数之间，在变量赋值前面的文件被处理后执行，影响到后续文件的处理和END动作；

Demo2:

　　几点说明:

　　@1:函数的参数默认是局部变量，无法在函数之外访问，而在函数中定义的变量为全局变量，可以在函数之外访问

　　@2:next语句会读取下一条记录，并重新回到脚本的最开始处执行(read next input record and process)

#!/bin/bash
#File: awkScript2.sh
#Author: lxw
#Time: 2014-08-23
#Usage: More about awk.

#Function
cat ./demo.txt|awk '
function func(a){
    b=a;
    print a;
}
{
    if(NR==1){
        print b;        #nothing
        func("lxw");    #"lxw"
        print b;        #"lxw"
    }
}'

#Pattern
#The second one is more simple.
#seq 1 20|awk '$0 ~ /3/{print $0}'
seq 1 20|awk '/3/{print}'

seq 1 5|awk '!/3/{print}'
seq 1 50|awk '/1/ && /3/{print}'

#Array
echo "1 2 3"|awk '
{
    for(i=0; i<NF; ++i)
        a[i]=i;
}
END{
if(3 in a)
    print "yes"
else
    print "no"
print 3 in a
for(i in a)
    printf "%s: %s
", i, a[i];
}'

#next
#<echo -e ""> equals to <echo $''>
#echo -e "line1
line2"|awk '{
echo $'line1
line2'|awk '{
print "Before next.."
print $0
next
print "After next.."
}'

#comma in print.
#The output of the following 2 lines are DIFFERENT.
echo $'1_2_3'|awk 'BEGIN{FS="_";OFS="!"}{print $1, $2, $3}' #comma is important
echo $'1_2_3'|awk 'BEGIN{FS="_";OFS="!"}{print $1 $2 $3}'    #OFS is NOT USEFUL here.

#Command
#NOTE that, in "sort -n", " is ESSENTIAL.
echo $'1
3
2
9
5'|awk '{print|"sort -n;date"}'

#String Operations.
#sub(ere, repl[,target]:ere->pattern; repl->new string; target->default value is $0. The return value of the sub() is the times that replacement happens.
echo "hello, world!"|awk '{print sub(/ello/, "i"); print $0}'
#The following awk has no arguments to deal with(has no target to deal with by awk). BUT it's OK. BUT BEGIN is ESSENTIAL if there is no arguments for awk.
#NOTE & means the value that matches.
awk 'BEGIN{var="lxw"; sub(/lxw/, "hello, &", var); print var}'
#gsub: gsub is similar to sub, "g" means "global".
#index: the return value is counted from 1 not 0.
awk 'BEGIN{print index("lxw", "l")}'
awk 'BEGIN{print index("lxw", "o")}'
#length:
awk 'BEGIN{print length("lxw")}'
#The following 2 lines are equal.
echo "lxw"|awk '{print length()}'
echo "lxw"|awk '{print length($0)}'
#match:return value is the index that matched. and the index is counted from 1 not 0.
awk 'BEGIN{print match("lxw", /xw/);printf "Matched at: %d, Matched substr length: %d
", RSTART, RLENGTH;}'
#split:split(s, a, fs)->split s by fs into a. The return value is the number of pieces.
awk 'BEGIN{
split("1;2;3;4;5", arr, ";");
for(i in arr)
    printf "arr[%d]=%d
", i, arr[i];
}'

awk 'BEGIN{
n=split("1;2;3;4;5", arr, ";");
for(i=1; i<=n; ++i)
    printf "arr[%d]=%d
", i, arr[i];
}'
#sprintf: similar to printf, but won't output the result, instead return it.
awk 'BEGIN{var=sprintf("%s=%s", "hello", "lxw");print var;}'
#substr(s, m[,n]):the index is counted from 1 not 0.
awk 'BEGIN{print substr("hello", 2, 3)}'
#tolower(s),toupper(s)
awk 'BEGIN{print toupper("lxw"); print tolower("LXW")}'

#IO.
#getline:
#NOTE: " in "head -n 10 ./demo.txt" is ESSENTIAL.
awk 'BEGIN{while("head -n 10 ./demo.txt"|getline var) print var;}'
#The following 2 lines are equal.
head -n 10 ./demo.txt|awk '{while(getline) print NF, NR, FNR, $0;}'
head -n 10 ./demo.txt|awk '{while(getline $0) print NF, NR, FNR, $0;}'
#system:execute the outer command.
awk 'BEGIN{system("tail -n 10 /etc/passwd")}'

demo.txt内容如下(非apnic原始文件,其中的数据已经进行了随机的修改):

apnic|CN|ipv4|233.223.176.0|4096|20140813|allocated|A8294605
apnic|AD|ipv4|233.223.192.0|4096|20100806|allocated|A9237638
apnic|JP|ipv4|233.223.208.0|2048|20120803|allocated|A12B1B09
apnic|AK|ipv4|233.223.216.0|1024|20100730|allocated|A92AEBC2
apnic|BG|ipv4|233.223.220.0|1024|20110411|allocated|A9498FAD
apnic|CP|ipv4|233.223.224.0|8192|20100730|allocated|A914F5F3
apnic|IN|ipv4|233.224.0.0|1048576|20100914|allocated|A1199197
apnic|AD|ipv4|233.240.0.0|524288|20100803|allocated|A2248097
apnic|US|ipv4|233.248.0.0|262144|20100713|allocated|A32869B9
apnic|AU|ipv4|223.252.0.0|32768|20100727|allocated|A91376C8
apnic|AB|ipv4|223.252.128.0|32768|20110131|allocated|A92A14F7
apnic|KR|ipv4|223.253.0.0|65536|20100728|allocated|A925D732
apnic|CT|ipv4|223.254.0.0|65536|20100723|allocated|A92FE265
apnic|CE|ipv4|223.255.0.0|32768|20100810|allocated|A91FEB6F
apnic|CK|ipv4|223.255.128.0|16384|20100810|allocated|A2132B1E
apnic|AR|ipv4|223.255.192.0|8192|20100802|allocated|A9239F52
apnic|ID|ipv4|223.255.224.0|2048|20100809|allocated|A12ED542
apnic|EU|ipv4|223.255.232.0|1024|20100812|allocated|A9290386
apnic|AN|ipv4|223.255.236.0|1024|20110311|allocated|A329E12C
apnic|HK|ipv4|223.255.240.0|1024|20100803|allocated|A95846F6
apnic|IN|ipv4|223.255.244.0|1024|20100804|allocated|A22523BF
apnic||ipv4|223.255.248.0|1024||reserved|
apnic|DN|ipv4|223.255.252.0|512|20110414|allocated|A42E1062
apnic|SG|ipv4|223.255.254.0|256|20110408|assigned|A93B8C6C
apnic|EU|ipv4|223.255.255.0|256|20110811|assigned|A3173591
apnic|CP|ipv6|2001:200::|35|19990813|allocated|A917B6AA
apnic|DP|ipv6|2001:200:2000::|35|20030423|allocated|A716B6AA
apnic|AF|ipv6|2001:200:4000::|34|20030423|allocated|A816B6AA
apnic|AP|ipv6|2001:200:8000::|33|20030423|allocated|A913B6AA
apnic||ipv6|2001:201::|32||available|
apnic||ipv6|2001:202::|31||available|
apnic||ipv6|2001:204::|30||available|
apnic|BG|ipv6|2001:208::|35|19990827|allocated|A519DB08
apnic|SG|ipv6|2001:208:2000::|35|20030306|allocated|A919DB58
apnic|BG|ipv6|2001:208:4000::|34|20030306|allocated|A919DB32
apnic|UN|ipv6|2001:208:8000::|33|20030306|allocated|A919DB38
apnic||ipv6|2001:209::|32||available|
apnic||ipv6|2001:20a::|31||available|
apnic||ipv6|2001:20c::|30||available|
apnic||ipv6|2001:210::|35||reserved|
apnic||ipv6|2001:210:2000::|35||available|
apnic||ipv6|2001:210:4000::|34||available|
apnic||ipv6|2001:210:8000::|33||available|
apnic||ipv6|2001:211::|32||available|
apnic||ipv6|2001:212::|31||available|
apnic||ipv6|2001:214::|30||available|
apnic|DE|ipv6|2001:218::|35|19990922|allocated|A91D8D8A
apnic|BP|ipv6|2001:218:2000::|35|20020724|allocated|A91D8D8A
apnic|SC|ipv6|2001:218:4000::|34|20020724|allocated|A91D8D8A
apnic|UP|ipv6|2001:218:8000::|33|20020724|allocated|A91D8D8A
apnic||ipv6|2001:219::|32||available|
apnic||ipv6|2001:21a::|31||available|
apnic||ipv6|2001:21c::|30||available|
apnic|DR|ipv6|2001:220::|35|19991006|allocated|A92B4E0E
apnic|CR|ipv6|2001:220:2000::|35|20031126|allocated|A92B4E0E
apnic|KE|ipv6|2001:220:4000::|34|20031126|allocated|A92B4E0E
apnic|AR|ipv6|2001:220:8000::|33|20031126|allocated|A92B4E0E
apnic||ipv6|2001:221::|32||available|
apnic||ipv6|2001:222::|31||available|
apnic||ipv6|2001:224::|30||available|
apnic||ipv6|2001:228::|35||reserved|
apnic||ipv6|2001:228:2000::|35||available|
apnic||ipv6|2001:228:4000::|34||available|
apnic||ipv6|2001:228:8000::|33||available|
apnic||ipv6|2001:229::|32||available|
apnic||ipv6|2001:22a::|31||available|
apnic||ipv6|2001:22c::|30||available|
apnic|AR|ipv6|2001:230::|35|19991124|allocated|A9267E82
apnic|KB|ipv6|2001:230:2000::|35|20020802|allocated|A9267E82
apnic|DR|ipv6|2001:230:4000::|34|20020802|allocated|A9267E82
apnic|KF|ipv6|2001:230:8000::|33|20020802|allocated|A9267E82
apnic||ipv6|2001:231::|32||available|
apnic||ipv6|2001:232::|31||available|
apnic||ipv6|2001:234::|30||available|
apnic|CE|ipv6|2001:238::|35|20000208|allocated|A912C007
apnic|SD|ipv6|2001:238:2000::|35|20020711|allocated|A912C007
apnic|TA|ipv6|2001:238:4000::|34|20020711|allocated|A912C007
apnic|CI|ipv6|2001:238:8000::|33|20020711|allocated|A912C007
apnic||ipv6|2001:239::|12||available|
apnic||ipv6|2001:23a::|35||available|
apnic||ipv6|2001:23c::|20||available|

View Code

Reference:

AWK简明教程: http://coolshell.cn/articles/9070.html

AWK简明教程: http://xu020408.blog.163.com/blog/static/26548920132704942773/

《sed & awk》读书笔记之 awk 篇: http://blog.jobbole.com/31817/