Pandas入门之七:迭代

已信任
Jupyter 服务器: 本地
Python 3: Not Started



[4]




import pandas as pd
import numpy as np
df = pd.DataFrame({
    'date':pd.date_range(start='20210714',periods=7,freq='D'),
    'a': np.linspace(0,6,7),
    'b': np.random.randn(7),
    'c': np.random.choice(['Low','Medium','High'],7).tolist(),
    'd': np.random.normal(100,10,size=(7)).tolist()
})
df
date    a    b    c    d
0    2021-07-14    0.0    -0.079268    Low    100.637433
1    2021-07-15    1.0    0.231418    High    112.083560
2    2021-07-16    2.0    0.288950    Medium    108.132161
3    2021-07-17    3.0    0.264166    High    90.819338
4    2021-07-18    4.0    -0.750558    Medium    100.886340
5    2021-07-19    5.0    1.173738    Medium    104.307198
6    2021-07-20    6.0    -0.418391    Low    88.523432
[6]



# for in 循环的是列
for col in df:
    print(col)
    print(df[col])
date
0   2021-07-14
1   2021-07-15
2   2021-07-16
3   2021-07-17
4   2021-07-18
5   2021-07-19
6   2021-07-20
Name: date, dtype: datetime64[ns]
a
0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
Name: a, dtype: float64
b
0   -0.079268
1    0.231418
2    0.288950
3    0.264166
4   -0.750558
5    1.173738
6   -0.418391
Name: b, dtype: float64
c
0       Low
1      High
2    Medium
3      High
4    Medium
5    Medium
6       Low
Name: c, dtype: object
d
0    100.637433
1    112.083560
2    108.132161
3     90.819338
4    100.886340
5    104.307198
6     88.523432
Name: d, dtype: float64
[8]



# iteritem 获取列和值
for key,value in df.iteritems():
    print(key)
    print(value)
date
0   2021-07-14
1   2021-07-15
2   2021-07-16
3   2021-07-17
4   2021-07-18
5   2021-07-19
6   2021-07-20
Name: date, dtype: datetime64[ns]
a
0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
Name: a, dtype: float64
b
0   -0.079268
1    0.231418
2    0.288950
3    0.264166
4   -0.750558
5    1.173738
6   -0.418391
Name: b, dtype: float64
c
0       Low
1      High
2    Medium
3      High
4    Medium
5    Medium
6       Low
Name: c, dtype: object
d
0    100.637433
1    112.083560
2    108.132161
3     90.819338
4    100.886340
5    104.307198
6     88.523432
Name: d, dtype: float64
[9]



# 按行打印,逐行迭代
for key,value in df.iterrows():
    print(key)
    print(value)
0
date    2021-07-14 00:00:00
a                         0
b                -0.0792684
c                       Low
d                   100.637
Name: 0, dtype: object
1
date    2021-07-15 00:00:00
a                         1
b                  0.231418
c                      High
d                   112.084
Name: 1, dtype: object
2
date    2021-07-16 00:00:00
a                         2
b                   0.28895
c                    Medium
d                   108.132
Name: 2, dtype: object
3
date    2021-07-17 00:00:00
a                         3
b                  0.264166
c                      High
d                   90.8193
Name: 3, dtype: object
4
date    2021-07-18 00:00:00
a                         4
b                 -0.750558
c                    Medium
d                   100.886
Name: 4, dtype: object
5
date    2021-07-19 00:00:00
a                         5
b                   1.17374
c                    Medium
d                   104.307
Name: 5, dtype: object
6
date    2021-07-20 00:00:00
a                         6
b                 -0.418391
c                       Low
d                   88.5234
Name: 6, dtype: object
[12]



 # 以元组形式打印
    for row in df.itertuples():
        print(row)
Pandas(Index=0, date=Timestamp('2021-07-14 00:00:00'), a=0.0, b=-0.07926836478101182, c='Low', d=100.6374326023984)
Pandas(Index=1, date=Timestamp('2021-07-15 00:00:00'), a=1.0, b=0.23141819210674755, c='High', d=112.08356043292231)
Pandas(Index=2, date=Timestamp('2021-07-16 00:00:00'), a=2.0, b=0.28895002255434654, c='Medium', d=108.13216066430968)
Pandas(Index=3, date=Timestamp('2021-07-17 00:00:00'), a=3.0, b=0.26416569787454686, c='High', d=90.81933760723473)
Pandas(Index=4, date=Timestamp('2021-07-18 00:00:00'), a=4.0, b=-0.7505580643324384, c='Medium', d=100.88634049762355)
Pandas(Index=5, date=Timestamp('2021-07-19 00:00:00'), a=5.0, b=1.1737384361425682, c='Medium', d=104.30719772518808)
Pandas(Index=6, date=Timestamp('2021-07-20 00:00:00'), a=6.0, b=-0.41839064630765915, c='Low', d=88.52343226534083)
[-]
原文地址:https://www.cnblogs.com/vvzhang/p/15012823.html