R语言中实现数据的匹配与合并

1、merge函数

dir()
test1 <- read.table("test1.txt", header = F)
test1
test2 <- read.table("test2.txt", header = F)
test2
result <- merge(test1, test2, by.x = "V1", by.y = "V1")
result
result2 <- merge(test1, test2, by.x = "V1", by.y = "V1", sort = F)
result2

> dir()     ## 列出当前工作路径下的文件
[1] "test1.txt" "test2.txt"
> test1 <- read.table("test1.txt", header = F)  ## 读取测试数据test1
> test1
   V1 V2
1 159  0
2 430  0
3 350  0
4 410  0
5 108  0
6 630  0
7 424  0
8 265  0
9 428  0
> test2 <- read.table("test2.txt", header = F)  ## 读取测试数据test2
> test2
   V1 V2
1 430 66
2 630 44
3 265 88
4 108 22
5 350 77
6 424 99
7 410 33
8 159 11
9 428 55
> result <- merge(test1, test2, by.x = "V1", by.y = "V1")  ## 利用merge进行合并，指定合并依据的列名，默认合并后并进行排序
> result
   V1 V2.x V2.y
1 108    0   22
2 159    0   11
3 265    0   88
4 350    0   77
5 410    0   33
6 424    0   99
7 428    0   55
8 430    0   66
9 630    0   44
> result2 <- merge(test1, test2, by.x = "V1", by.y = "V1", sort = F) ## 利用sort = F选项设定匹配合并后不排序
> result2
   V1 V2.x V2.y
1 159    0   11
2 430    0   66
3 350    0   77
4 410    0   33
5 108    0   22
6 630    0   44
7 424    0   99
8 265    0   88
9 428    0   55

2、match函数实现

dir()
test1 <- read.table("test1.txt", header = F)
test1
test2 <- read.table("test2.txt", header = F)
test2
result <- test2[match(test1$V1, test2$V1),]
result

> dir()
[1] "test1.txt" "test2.txt"
> test1 <- read.table("test1.txt", header = F)  ## 读取测试数据
> test1
   V1 V2
1 159  0
2 430  0
3 350  0
4 410  0
5 108  0
6 630  0
7 424  0
8 265  0
9 428  0
> test2 <- read.table("test2.txt", header = F)
> test2
   V1 V2
1 430 66
2 630 44
3 265 88
4 108 22
5 350 77
6 424 99
7 410 33
8 159 11
9 428 55
> result <- test2[match(test1$V1, test2$V1),]   ## 利用match函数匹配，返回索引，利用索引提取指定数据
> result
   V1 V2
8 159 11
1 430 66
5 350 77
7 410 33
4 108 22
2 630 44
6 424 99
3 265 88
9 428 55

3、shell实现

root@PC1:/home/test2# ls
test1.txt  test2.txt
root@PC1:/home/test2# cat test1.txt
159     0
430     0
350     0
410     0
108     0
630     0
424     0
265     0
428     0
root@PC1:/home/test2# cat test2.txt
430     66
630     44
265     88
108     22
350     77
424     99
410     33
159     11
428     55
root@PC1:/home/test2# cat test1.txt | while read {i,j}; do grep -w "$i" test2.txt >> result.txt; done  ## 利用while循环 + grep命令实现
root@PC1:/home/test2# ls
result.txt  test1.txt  test2.txt
root@PC1:/home/test2# cat result.txt
159     11
430     66
350     77
410     33
108     22
630     44
424     99
265     88
428     55