pandas 6 合并数据 concat, append 垂直合并,数据会变高/长

from __future__ import print_function
import pandas as pd
import numpy as np

concatenating

# ignore index
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])

print(df1)
print(df2)
print(df3)

>      a    b    c    d
> 0  0.0  0.0  0.0  0.0
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0

>      a    b    c    d
> 0  1.0  1.0  1.0  1.0
> 1  1.0  1.0  1.0  1.0
> 2  1.0  1.0  1.0  1.0

>      a    b    c    d
> 0  2.0  2.0  2.0  2.0
> 1  2.0  2.0  2.0  2.0
> 2  2.0  2.0  2.0  2.0
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)  # 忽略掉原来的编号012012012重新排序成0
print(res)

>      a    b    c    d
> 0  0.0  0.0  0.0  0.0
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0
> 3  1.0  1.0  1.0  1.0
> 4  1.0  1.0  1.0  1.0
> 5  1.0  1.0  1.0  1.0
> 6  2.0  2.0  2.0  2.0
> 7  2.0  2.0  2.0  2.0
> 8  2.0  2.0  2.0  2.0
join, ('inner', 'outer')
# join, ('inner', 'outer')
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)

>      a    b    c    d
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0
> 3  0.0  0.0  0.0  0.0

>      b    c    d    e
> 2  1.0  1.0  1.0  1.0
> 3  1.0  1.0  1.0  1.0
> 4  1.0  1.0  1.0  1.0
res = pd.concat([df1, df2], axis=1, join='outer')  # 默认是outer,没有的属性值用NaN填充,求并集
print(res)

>      a    b    c    d    b    c    d    e
> 1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
> 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
> 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
> 4  NaN  NaN  NaN  NaN  1.0  1.0  1.0  1.0
res = pd.concat([df1, df2], axis=1, join='inner')  # 只寻找有相同属性的值,其他舍弃,求交集
print(res)

>      a    b    c    d    b    c    d    e
> 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
> 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
join_axes
# join_axes
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])  # 保留df1,
print(res)

>      a    b    c    d    b    c    d    e
> 1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
> 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
> 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0

append

# append
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)

>      a    b    c    d
> 0  0.0  0.0  0.0  0.0
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0

>      a    b    c    d
> 0  1.0  1.0  1.0  1.0
> 1  1.0  1.0  1.0  1.0
> 2  1.0  1.0  1.0  1.0
res = df1.append([df2], ignore_index=True)
print(res)

>      a    b    c    d
> 0  0.0  0.0  0.0  0.0
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0
> 3  1.0  1.0  1.0  1.0
> 4  1.0  1.0  1.0  1.0
> 5  1.0  1.0  1.0  1.0
res = df1.append([df2, df3])
print(res)

>      a    b    c    d    e
> 0  0.0  0.0  0.0  0.0  NaN
> 1  0.0  0.0  0.0  0.0  NaN
> 2  0.0  0.0  0.0  0.0  NaN
> 0  1.0  1.0  1.0  1.0  NaN
> 1  1.0  1.0  1.0  1.0  NaN
> 2  1.0  1.0  1.0  1.0  NaN
> 2  NaN  1.0  1.0  1.0  1.0
> 3  NaN  1.0  1.0  1.0  1.0
> 4  NaN  1.0  1.0  1.0  1.0
s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(s1)

> a    1
> b    2
> c    3
> d    4
> dtype: int64
res = df1.append(s1, ignore_index=True)  # 添加具体的一行
print(res)

>      a    b    c    d
> 0  0.0  0.0  0.0  0.0
> 1  0.0  0.0  0.0  0.0
> 2  0.0  0.0  0.0  0.0
> 3  1.0  2.0  3.0  4.0

END

原文地址:https://www.cnblogs.com/yangzhaonan/p/10436031.html