plink 软件中 updatemap 命令

plink软件中 --update-map 用于更新snp的物理位置。

1、测试数据

root@PC1:/home/test# ls
new.pos  outcome.map  outcome.ped
root@PC1:/home/test# cat outcome.map    ## 一共10个snp
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
1       snp6    0       356863
1       snp7    0       400518
1       snp8    0       487423
1       snp9    0       578716
1       snp10   0       639876
root@PC1:/home/test# cat outcome.ped
DOR     1       0       0       0       -9      G G     C C     G G     G G     A G     A A     G G     G C     A G     A A
DOR     2       0       0       0       -9      G G     0 0     G G     G G     G G     A A     A G     C C     A G     0 0
DOR     3       0       0       0       -9      G G     0 0     G G     G G     G G     A A     A G     G C     G G     0 0
DOR     4       0       0       0       -9      G G     0 0     G G     G G     G G     A A     G G     G G     0 0     0 0
DOR     5       0       0       0       -9      G G     0 0     G G     G G     G G     A A     A G     G C     G G     A A
DOR     6       0       0       0       -9      G G     C C     G G     G G     G G     A A     A A     C C     G G     A A

2、利用--update-map 更新snp物理位置

root@PC1:/home/test# ls
new.pos  outcome.map  outcome.ped
root@PC1:/home/test# cat new.pos  ## 需要准备一个配置文件,指定更新后snp的位置
snp1 55555
snp5 333333
snp7 444444
root@PC1:/home/test# plink --file outcome --update-map new.pos --recode tab --out result &> /dev/null  ## &>的意思是不要在屏幕上显示过程
root@PC1:/home/test# ls
new.pos  outcome.map  outcome.ped  result.log  result.map  result.nosex  result.ped
root@PC1:/home/test# cat outcome.map
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       312707
1       snp6    0       356863
1       snp7    0       400518
1       snp8    0       487423
1       snp9    0       578716
1       snp10   0       639876
root@PC1:/home/test# cat result.map  ##可以发现snp1、snp5、snp7的物理位置已经更新为new.pos中的物理位置
1       snp1    0       55555
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1       snp5    0       333333
1       snp6    0       356863
1       snp7    0       444444
1       snp8    0       487423
1       snp9    0       578716
1       snp10   0       639876

3、shell实现

root@PC1:/home/test# ls
new.pos  outcome.map  outcome.ped
root@PC1:/home/test# cp outcome.map outcome.map_bak    ## 备份文件
root@PC1:/home/test# ls
new.pos  outcome.map  outcome.map_bak  outcome.ped
root@PC1:/home/test# cat new.pos
snp1 55555
snp5 333333
snp7 444444
root@PC1:/home/test# cat new.pos | while read {i,j}; do awk -v a=$i -v b=$j '{OFS = "\t"}{if($2 == a ) {print $0; $4 = b; print $0 }}' outcome.map >> temp; done
##利用while循环打印出需要替换的行 root@PC1:
/home/test# ls new.pos outcome.map outcome.map_bak outcome.ped temp root@PC1:/home/test# cat temp 1 snp1 0 55910 1 snp1 0 55555 1 snp5 0 312707 1 snp5 0 333333 1 snp7 0 400518 1 snp7 0 444444 root@PC1:/home/test# sed 's/\t/_/g' temp -i ## 将制表符替换为_ root@PC1:/home/test# cat temp 1_snp1_0_55910 1_snp1_0_55555 1_snp5_0_312707 1_snp5_0_333333 1_snp7_0_400518 1_snp7_0_444444 root@PC1:/home/test# sed "N; s/\n/\t/" temp -i ## 两行转换为一行 root@PC1:/home/test# cat temp 1_snp1_0_55910 1_snp1_0_55555 1_snp5_0_312707 1_snp5_0_333333 1_snp7_0_400518 1_snp7_0_444444 root@PC1:/home/test# cat outcome.map 1 snp1 0 55910 1 snp2 0 85204 1 snp3 0 122948 1 snp4 0 203750 1 snp5 0 312707 1 snp6 0 356863 1 snp7 0 400518 1 snp8 0 487423 1 snp9 0 578716 1 snp10 0 639876 root@PC1:/home/test# sed 's/\t/_/g' outcome.map -i ## 将map文件的制表符替换为_ root@PC1:/home/test# cat outcome.map 1_snp1_0_55910 1_snp2_0_85204 1_snp3_0_122948 1_snp4_0_203750 1_snp5_0_312707 1_snp6_0_356863 1_snp7_0_400518 1_snp8_0_487423 1_snp9_0_578716 1_snp10_0_639876 root@PC1:/home/test# cat temp | while read {i,j}; do sed "s/$i/$j/" outcome.map -i; done ## 利用while循环将需要替换的行替换掉 root@PC1:/home/test# cat outcome.map 1_snp1_0_55555 1_snp2_0_85204 1_snp3_0_122948 1_snp4_0_203750 1_snp5_0_333333 1_snp6_0_356863 1_snp7_0_444444 1_snp8_0_487423 1_snp9_0_578716 1_snp10_0_639876 root@PC1:/home/test# sed 's/_/\t/g' outcome.map -i root@PC1:/home/test# cat outcome.map ## map中snp位置已经更新 1 snp1 0 55555 1 snp2 0 85204 1 snp3 0 122948 1 snp4 0 203750 1 snp5 0 333333 1 snp6 0 356863 1 snp7 0 444444 1 snp8 0 487423 1 snp9 0 578716 1 snp10 0 639876

4、R实现

dat <- read.table("outcome.map", header = F)
dat
newpos <- read.table("new.pos", header = F)
newpos

idx <- match(newpos$V1, dat$V2)  ## 利用snpID进行匹配,获取索引
sum(is.na(idx))   ## 查看是否有空值
dat[idx,][4] = newpos$V2  ## 将匹配行的第四列替换为newpos的第2列
dat

原文地址:https://www.cnblogs.com/liujiaxin2018/p/15690734.html