交互或批量计算区间内基因数目(while read)

1.交互状态

#!/bin/bash
#count genes in region,need three para
#author lee

if [ $# -ne 3 ]
then
        echo "Usage chr start end"
else
        num=$(awk -v chr=$1 -v start=$2 -v end=$3 '{if($3=="gene" && $1==chr && $4>=start && $4<=end)sum++}END{print sum}' /public/home/caisl/lee/genome/msu.gff3)
        echo -e "33[31mThere are $num genes of $1:$2-$333[0m"
        echo -e "33[32mDetail info is in the file of ${1}_${2}_${3}33[0m"
        awk -v chr=$1 -v start=$2 -v end=$3 '{if($3=="gene" && $1==chr && $4>=start && $4<=end)print $0}' /public/home/caisl/lee/genome/msu.gff3 >${1}_${2}_${3}.txt
fi

2.批量提取

chr start end
1 100 100
2 200 2000
#!/bin/bash

#author lee

if [ -f detail_of_$1 ]
then
        rm detail_of_$1
fi
if [ -f number_of_$1 ]
then
        rm number_of_$1
fi

while read line
do
        chr=$(echo $line |awk '{print $1}');
        start=$(echo $line |awk '{print $2}');
        end=$(echo $line |awk '{print $3}');
        awk -v chr=$chr -v start=$start -v end=$end '{if($3=="gene" && $1==chr && $4>=start && $4<=end)print $0}' /public/home/caisl/lee/genome/msu.gff3 >>detail_of_$1;
        num=$(awk -v chr=$chr -v start=$start -v end=$end '{if($3=="gene" && $1==chr && $4>=start && $4<=end)sum++}END{print sum}' /public/home/caisl/lee/genome/msu.gff3);
        echo "There are $num genes of $chr:$start-$end"
        echo "There are $num genes of $chr:$start-$end" >>number_of_$1;
done <$1


后面继续尝试下面这种方法

#!/bin/bash
cat $1 |while read gene chr from to
do
    #echo $chr $from $to
    if echo $2 |grep -q '.*.vcf.gz$';then
        vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out $gene.$chr.$from-$to 
    elif echo $2 |grep -q '.*.vcf$';then
        vcftools --vcf $2 --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out $gene.$chr.$from-$to
    fi
done
原文地址:https://www.cnblogs.com/xiaosagege/p/15215119.html