linux shell 实现 plink recode A 命令

1、

cut -d " " -f 7- $1 > nuc.ped


awk '{for(i = 1; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > top.ped

awk '{for(i = 2; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > bottom.ped

cat top.ped bottom.ped > onecol.ped; rm top.ped bottom.ped

for i in $(seq `head -n 1 onecol.ped | awk '{print NF}'`); do cut -d " " -f $i onecol.ped | sort | uniq -c | sort -n | head -n 1 | awk '{print $2}' >> min_allele.txt;done
rm -f onecol.ped

awk '{for(i = 1; i <= NF; i++) if(i % 2 != 0) {printf("%s_", $i)} else {printf("%s ", $i)} {printf("\n")}}' nuc.ped > a && mv a nuc.ped

k=0;for i in `cat min_allele.txt`; do let k++;cut -d " " -f $k nuc.ped > tempx; for j in `cat tempx`; do echo $j | grep -o $i | wc -l >> tempresult; done; done
rm -f tempx

ind=$(sed -n "$=" nuc.ped )
awk -v a=$ind '{if(NR % a == 0) {printf("%s\n", $0)} else {printf("%s ", $0)}}' tempresult > a && mv a tempresult


for i in $(seq `head -n 1 tempresult | awk '{print NF}'`); do cut -d " " -f $i tempresult | paste -s -d " " >> rrr; done

mv rrr tempresult

cut -f 2 $2 | paste - -d "_" min_allele.txt | paste -s -d " " | cat - tempresult > a && mv a tempresult

cut -d " " -f 1-6 $1 | sed "1i FID IID PAT MAT SEX PHENOTYPE" | paste - -d " " tempresult > result.paw
rm -f tempresult min_allele.txt nuc.ped

用法:

root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r
root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw
root@PC1:/home/test/test/test2# cat result.paw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# cat outcome.ped
DOR 1 0 0 0 -9 C C C C T T G G A G G G
DOR 2 0 0 0 -9 C G G C G G G G G G G G
DOR 3 0 0 0 -9 G G C C G G G G G G A A
DOR 4 0 0 0 -9 G G C C G G G G G G A A
DOR 5 0 0 0 -9 G G C C G G G G G G A A
DOR 6 0 0 0 -9 G G C C G G G G G G A A
DOR 7 0 0 0 -9 G G C C G G A G A A A A
DOR 9 0 0 0 -9 G G C C G G A G A A A A

2、plink软件验证

root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r
root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw
root@PC1:/home/test/test/test2# plink --file outcome --recode A --out temp > /dev/null; rm *log *.nosex
root@PC1:/home/test/test/test2# ls
outcome.map  outcome.ped  record.r  result.paw  temp.raw
root@PC1:/home/test/test/test2# cat temp.raw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# cat result.paw
FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G
DOR 1 0 0 0 -9 2 0 2 0 1 2
DOR 2 0 0 0 -9 1 1 0 0 0 2
DOR 3 0 0 0 -9 0 0 0 0 0 0
DOR 4 0 0 0 -9 0 0 0 0 0 0
DOR 5 0 0 0 -9 0 0 0 0 0 0
DOR 6 0 0 0 -9 0 0 0 0 0 0
DOR 7 0 0 0 -9 0 0 0 1 2 0
DOR 9 0 0 0 -9 0 0 0 1 2 0
root@PC1:/home/test/test/test2# md5sum result.paw temp.raw
563fbde796e2d64dfc9c4570e71a925f  result.paw
563fbde796e2d64dfc9c4570e71a925f  temp.raw
原文地址:https://www.cnblogs.com/liujiaxin2018/p/15501776.html