pandas之join、merge

 

join按行索引进行拼接,merge按列所以进行拼接

In [1]:
import pandas as pd 
import numpy as np
In [2]:
d1 = pd.DataFrame(np.arange(12).reshape(3,4),index=list("ABC"),columns=list("HIJK"))
d2 = pd.DataFrame(np.arange(12).reshape(4,3),index=list("ABCD"),columns=list("KLM"))
print(d1,"
*******************
",d2)
 
   H  I   J   K
A  0  1   2   3
B  4  5   6   7
C  8  9  10  11 
*******************
    K   L   M
A  0   1   2
B  3   4   5
C  6   7   8
D  9  10  11
In [3]:
#有相同列名时,需要至少给其中一列添加后缀
print(d1.join(d2,lsuffix="_left",rsuffix="_right"))
 
   H  I   J  K_left  K_right  L  M
A  0  1   2       3        0  1  2
B  4  5   6       7        3  4  5
C  8  9  10      11        6  7  8
In [4]:
print(d2.join(d1,lsuffix="_left",rsuffix="_right"))
 
   K_left   L   M    H    I     J  K_right
A       0   1   2  0.0  1.0   2.0      3.0
B       3   4   5  4.0  5.0   6.0      7.0
C       6   7   8  8.0  9.0  10.0     11.0
D       9  10  11  NaN  NaN   NaN      NaN
In [5]:
d3 = pd.DataFrame(np.zeros(shape=(3,3)),columns=list("HOP"))
d3.loc[2,"H"] = 1
print(d1,"
*******************
",d3)
 
   H  I   J   K
A  0  1   2   3
B  4  5   6   7
C  8  9  10  11 
*******************
      H    O    P
0  0.0  0.0  0.0
1  0.0  0.0  0.0
2  1.0  0.0  0.0
In [6]:
print(d1.merge(d3,on="H"))
#print(d1.merge(d3))
 
   H  I  J  K    O    P
0  0  1  2  3  0.0  0.0
1  0  1  2  3  0.0  0.0
In [7]:
print(d1.merge(d3,on="H",how="outer"))
 
   H    I     J     K    O    P
0  0  1.0   2.0   3.0  0.0  0.0
1  0  1.0   2.0   3.0  0.0  0.0
2  4  5.0   6.0   7.0  NaN  NaN
3  8  9.0  10.0  11.0  NaN  NaN
4  1  NaN   NaN   NaN  0.0  0.0
In [8]:
print(d1.merge(d3,on="H",how="left"))
 
   H  I   J   K    O    P
0  0  1   2   3  0.0  0.0
1  0  1   2   3  0.0  0.0
2  4  5   6   7  NaN  NaN
3  8  9  10  11  NaN  NaN
In [9]:
print(d1.merge(d3,on="H",how="right"))
 
   H    I    J    K    O    P
0  0  1.0  2.0  3.0  0.0  0.0
1  0  1.0  2.0  3.0  0.0  0.0
2  1  NaN  NaN  NaN  0.0  0.0
In [10]:
print(d2,"
********************
",d3)
print("*"*20)
print(d2.merge(d3,left_on="K",right_on="H"))#没有相同列名时的合并
 
   K   L   M
A  0   1   2
B  3   4   5
C  6   7   8
D  9  10  11 
********************
      H    O    P
0  0.0  0.0  0.0
1  0.0  0.0  0.0
2  1.0  0.0  0.0
********************
   K  L  M    H    O    P
0  0  1  2  0.0  0.0  0.0
1  0  1  2  0.0  0.0  0.0
In [11]:
#有多个列名相同时
d3["I"] = [1.,0.,0.]
print(d1,"
"+"*"*20+"
",d3,"
"+"*"*20)
print(d1.merge(d3),"
"+"*"*20)
print(d1.merge(d3,on="H"))
 
   H  I   J   K
A  0  1   2   3
B  4  5   6   7
C  8  9  10  11 
********************
      H    O    P    I
0  0.0  0.0  0.0  1.0
1  0.0  0.0  0.0  0.0
2  1.0  0.0  0.0  0.0 
********************
   H  I  J  K    O    P
0  0  1  2  3  0.0  0.0 
********************
   H  I_x  J  K    O    P  I_y
0  0    1  2  3  0.0  0.0  1.0
1  0    1  2  3  0.0  0.0  0.0
原文地址:https://www.cnblogs.com/FinnChan/p/11604907.html