DataFrame

个人见解:像Excel

import numpy as np
import pandas as pd
print(np.array([1,2,3,4,5]))
[1 2 3 4 5]
print(np.arange(1,10,1))
[1 2 3 4 5 6 7 8 9]
print(np.array(np.arange(10)))
[0 1 2 3 4 5 6 7 8 9]


myList = [[0,1],[1,2],[2,3]]
print(np.array(myList))
[[0 1]
 [1 2]
 [2 3]]
myList1= [[0,5],[1,6],[2,7]]
print(np.array(myList1))
[[0 5]
 [1 6]
 [2 7]]
#相加
List1 = np.array(myList)
List2 = np.array(myList1)
print(List1+List2)
[[ 0  6]
 [ 2  8]
 [ 4 10]]
#合并
print(np.concatenate((List1,List2),axis=1))
[[0 1 0 5]
 [1 2 1 6]
 [2 3 2 7]]
print(np.hstack((List1,List2)))
[[0 1 0 5]
 [1 2 1 6]
 [2 3 2 7]]


#索引
ser1 = np.array([1,2,3,4,5])
pd1 = pd.Series(ser1,index=np.arange(5))
print(pd1)
0    1
1    2
2    3
3    4
4    5
dtype: int32
ser2 = np.array([6,7,8,9,10])
pd2 = pd.Series(ser2,index=np.arange(5))
print(pd2)
0     6
1     7
2     8
3     9
4    10
dtype: int32

print(pd.DataFrame(ser2,index=np.arange(5),columns=['apple']))
   apple
0      6
1      7
2      8
3      9
4     10

print(pd.Series([3,2,0,1],index=np.arange(4)))
print(pd.Series([0,3,7,2],index=np.arange(4)))
0    3
1    2
2    0
3    1
dtype: int64
0    0
1    3
2    7
3    2
dtype: int64
myList2 = [[3,0],[2,3],[0,7],[1,2]]
print(pd.DataFrame(myList2,index=np.arange(4),columns=['apples','oranges']))
   apples  oranges
0       3        0
1       2        3
2       0        7
3       1        2


import pandas as pd
from pandas import Series,DataFrame
x1 = Series([1,2,3,4])
x2 = Series(data=[1,2,3,4],index=['a','b','c','d'])
mydata = {'a':1,'b':2,'c':3,'d':4}#使用字典创建
x3 = Series(mydata)
print(x1)
0    1
1    2
2    3
3    4
dtype: int64
print(x2)
a    1
b    2
c    3
d    4
dtype: int64
print(x3)
a    1
b    2
c    3
d    4
dtype: int64
print(x3.count())
4
print(x3.max())
4
print(x3.min())
1
print(x3.mean())
2.5
print(x3.sum())
10
print(x3.median())
2.5
print(x3.argmax())
3
print(x3.var())
1.6666666666666667
print(x3.describe())
count    4.000000
mean     2.500000
std      1.290994
min      1.000000
25%      1.750000
50%      2.500000
75%      3.250000
max      4.000000
dtype: float64
df1 = DataFrame({'name':['zhangfei','guanyu','a','b','c'],'data1':range(1,6)})
df2 = DataFrame({'name':['zhangfei','guanyu','A','B','C'],'data2':range(1,6)})
df3 = pd.merge(df1,df2,on='name')
print(df1)
print(df2)
print(df3)
 name  data1
0  zhangfei      1
1    guanyu      2
2         a      3
3         b      4
4         c      5
       name  data2
0  zhangfei      1
1    guanyu      2
2         A      3
3         B      4
4         C      5
       name  data1  data2
0  zhangfei      1      1
1    guanyu      2      2

df3 = pd.merge(df1,df2,how='inner')
print(df3)
name  data1  data2
0  zhangfei      1      1
1    guanyu      2      2
#第一个
df3 = pd.merge(df1,df2,how='left')
print(df3)
       name  data1  data2
0  zhangfei      1    1.0
1    guanyu      2    2.0
2         a      3    NaN
3         b      4    NaN
4         c      5    NaN
#第二个
df3 = pd.merge(df1,df2,how='right')
print(df3)
       name  data1  data2
0  zhangfei    1.0      1
1    guanyu    2.0      2
2         A    NaN      3
3         B    NaN      4
4         C    NaN      5
#所有
df3 = pd.merge(df1,df2,how='outer')
print(df3)
       name  data1  data2
0  zhangfei    1.0    1.0
1    guanyu    2.0    2.0
2         a    3.0    NaN
3         b    4.0    NaN
4         c    5.0    NaN
5         A    NaN    3.0
6         B    NaN    4.0
7         C    NaN    5.0
data = {'Chinese': [66, 95, 93, 90,80], 'Math': [30, 98, 96, 77, 90], 'English': [65, 85, 92, 88, 90]}
df = DataFrame(data, index=['ZhangFei', 'GuanYu', 'LiuBei', 'DianWei', 'XuChu'], columns=['Chinese', 'Math', 'English'])
print(df)
          Chinese  Math  English
ZhangFei       66    30       65
GuanYu         95    98       85
LiuBei         93    96       92
DianWei        90    77       88
XuChu          80    90       90
print(df.loc['ZhangFei'])
Chinese    66
Math       30
English    65
Name: ZhangFei, dtype: int64
print(df.iloc[0])
Chinese    66
Math       30
English    65
Name: ZhangFei, dtype: int64
print(df.columns)
Index(['Chinese', 'Math', 'English'], dtype='object')
print(df.iloc[2]['Math'])
96
print(df.iloc[2]['Chinese'])
93


















原文地址:https://www.cnblogs.com/Cookie-Jing/p/13604570.html