DataFrame常用方法

#Pandas数据结构Dataframe:基本技巧

#数据查看、转置 / 添加、修改、删除值 / 对齐 / 排序
import numpy as np
import pandas as pd

#数据查看、转置
df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
print(df)
print(df.head(2)) #查看头默认值是5条数据
print(df.tail(3)) #查看尾

print(df.T) #行列转置
           a          b
0  13.001170  52.302971
1  45.443235  17.136341
2  87.749437  61.681361
3  27.203306  54.923447
4  27.661213  28.096315
           a          b
0  13.001170  52.302971
1  45.443235  17.136341
           a          b
2  87.749437  61.681361
3  27.203306  54.923447
4  27.661213  28.096315
           0          1          2          3          4
a  13.001170  45.443235  87.749437  27.203306  27.661213
b  52.302971  17.136341  61.681361  54.923447  28.096315
#添加,修改,删除
#添加和修改:和字典的操作类似
df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
df['c']=10 #添加列c全为10
df.loc[5]=20 #添加行5值为20
df.loc[1:3,'a':'c']=30 #修改行1-3,列a-c的值为30
print(df)

#删除,一般都用drop
df1 = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
df1.drop(['a'],axis=1,inplace=True)#drop()删除列,需要加上axis = 1,inplace=True → 改变原数据
df2 = df.drop([0],axis=0) #drop()默认删除行,本质是axis = 0,inplace=False → 生产新的数据,不改变原数据
print(df1)
print(df2)

#删除列还可以有 del df['a'] 一般不使用
           a          b   c
0   1.402976   7.213545  10
1  30.000000  30.000000  30
2  30.000000  30.000000  30
3  30.000000  30.000000  30
4  16.940650  71.386239  10
5  20.000000  20.000000  20
           b
0  20.961412
1  86.214446
2  35.080610
3   9.396529
4   9.427302
          a          b   c
1  30.00000  30.000000  30
2  30.00000  30.000000  30
3  30.00000  30.000000  30
4  16.94065  71.386239  10
5  20.00000  20.000000  20
# 对齐

df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
print(df1)
print(df2)
print(df1 + df2)
# DataFrame对象之间的数据自动按照列和索引(行标签)对齐相加,任何值加nan还是nan
          A         B         C         D
0 -0.186700  0.654873 -0.675748  1.274324
1 -0.203601  0.522645  1.327030 -1.211796
2  1.210807 -0.333704 -0.068803  0.626071
3  0.381998  1.352354 -1.122596 -0.039185
4 -1.794919 -0.636484 -1.248661  0.595253
5 -0.724729  0.845360 -0.318300  0.154419
6 -1.363716  0.539871 -0.466797 -0.991755
7 -1.746204 -0.211044  0.265923  1.479545
8  0.964734  1.702910 -1.231199 -0.095801
9  0.027144  0.565912  0.494676  0.297138
          A         B         C
0 -0.266384 -0.697483 -0.787006
1 -1.247451  1.418789 -0.728944
2 -1.750087  0.108112  0.721652
3  0.131859 -1.157439  0.169533
4  1.051047 -0.395463  1.161415
5 -0.232839 -0.203111 -0.135780
6 -1.933584  0.280714 -1.493124
          A         B         C   D
0 -0.453084 -0.042610 -1.462754 NaN
1 -1.451051  1.941434  0.598086 NaN
2 -0.539280 -0.225593  0.652849 NaN
3  0.513857  0.194915 -0.953063 NaN
4 -0.743872 -1.031947 -0.087246 NaN
5 -0.957568  0.642249 -0.454080 NaN
6 -3.297300  0.820585 -1.959921 NaN
7       NaN       NaN       NaN NaN
8       NaN       NaN       NaN NaN
9       NaN       NaN       NaN NaN
#排序
#1.按值排序(单个值) sort_values()
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                   columns = ['a','b','c','d'])
print(df1)
print(df1.sort_values(['a'], ascending = True))  # 升序
print(df1.sort_values(['a'], ascending = False))  # 降序
print('------')
# ascending参数:设置升序降序,默认升序


#按值排序(多个值,依次往后为备排序选项)
df2 = pd.DataFrame({'a':[1,1,1,1,2,2,2,2],
                  'b':list(range(8)),
                  'c':list(range(8,0,-1))})
print(df2)
print(df2.sort_values(['a','c']))


#2.按行索引排序 sort_index()
df3 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                  index = [5,4,3,2],
                   columns = ['a','b','c','d'])
df4 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                  index = ['h','s','x','g'],
                   columns = ['a','b','c','d'])
print(df3)
print(df3.sort_index())
print(df4)
print(df4.sort_index())
           a          b          c          d
0  14.363296  24.468750   0.862332  78.414560
1  50.376623  70.058587  12.016014  43.824154
2   9.164948   6.009718  59.899663  34.608598
3  32.776729   5.877378  19.845187  18.427510
           a          b          c          d
2   9.164948   6.009718  59.899663  34.608598
0  14.363296  24.468750   0.862332  78.414560
3  32.776729   5.877378  19.845187  18.427510
1  50.376623  70.058587  12.016014  43.824154
           a          b          c          d
1  50.376623  70.058587  12.016014  43.824154
3  32.776729   5.877378  19.845187  18.427510
0  14.363296  24.468750   0.862332  78.414560
2   9.164948   6.009718  59.899663  34.608598
------
   a  b  c
0  1  0  8
1  1  1  7
2  1  2  6
3  1  3  5
4  2  4  4
5  2  5  3
6  2  6  2
7  2  7  1
   a  b  c
3  1  3  5
2  1  2  6
1  1  1  7
0  1  0  8
7  2  7  1
6  2  6  2
5  2  5  3
4  2  4  4
           a          b          c          d
5  31.649529  84.868273  35.960909  17.991508
4  70.713399   3.196341   1.838718   2.587589
3  22.504265   3.529035  29.175653  61.559803
2  10.746665  74.852949  49.412317  28.020466
           a          b          c          d
2  10.746665  74.852949  49.412317  28.020466
3  22.504265   3.529035  29.175653  61.559803
4  70.713399   3.196341   1.838718   2.587589
5  31.649529  84.868273  35.960909  17.991508
           a          b          c          d
h  77.066414   3.475510  64.363116  51.800866
s  40.764677  42.263241   9.385392  44.899110
x  12.105920  29.185573  59.567246  79.056572
g  67.852492  11.714584  84.943183  75.268707
           a          b          c          d
g  67.852492  11.714584  84.943183  75.268707
h  77.066414   3.475510  64.363116  51.800866
s  40.764677  42.263241   9.385392  44.899110
x  12.105920  29.185573  59.567246  79.056572
原文地址:https://www.cnblogs.com/Franciszw/p/13888659.html