pandas_dataformat03

  1. 如何以dataframe的形式选择特定的列
    df = pd.DataFrame(np.arange(20).reshape(-1, 5), columns=list('abcde'))
    # print(df)
    
    # 以dataframe的形式选择特定的列
    type(df[['a']])
    type(df.loc[:, ['a']])
    print(type(df.iloc[:, [0]]))
    
    # 以series的形式选择特定的列
    type(df.a)
    type(df['a'])
    type(df.loc[:, 'a'])
    print(type(df.iloc[:, 1]))
    
    #>    <class 'pandas.core.frame.DataFrame'>
        <class 'pandas.core.series.Series'>
  2. 如何改变dataframe中的列顺序
    df = pd.DataFrame(np.arange(20).reshape(-1, 5), columns=list('abcde'))
    
    print(df)
    # 交换col1和col2
    def switch_columns(df, col1=None, col2=None):
        colnames = df.columns.tolist()
        i1, i2 = colnames.index(col1), colnames.index(col2)
        colnames[i2], colnames[i1] = colnames[i1], colnames[i2]
        return df[colnames]
    
    df1 = switch_columns(df, 'a', 'c')
    print(df1)
    
    #>        a   b   c   d   e
        0   0   1   2   3   4
        1   5   6   7   8   9
        2  10  11  12  13  14
        3  15  16  17  18  19
    #>        c   b   a   d   e
        0   2   1   0   3   4
        1   7   6   5   8   9
        2  12  11  10  13  14
        3  17  16  15  18  19
  3. 如何格式化dataframe的值
    df = pd.DataFrame(np.random.random(4)**10, columns=['random'])
    print(df)
    # 显示小数点后四位
    df.apply(lambda x: '%.4f' % x, axis=1)
    print(df)
    
    #>             random
        0  3.539348e-04
        1  3.864140e-10
        2  2.973575e-02
        3  1.414061e-01
    #>             random
        0  3.539348e-04
        1  3.864140e-10
        2  2.973575e-02
        3  1.414061e-01
  4. 如何将dataframe中的所有值以百分数的格式表示
    df = pd.DataFrame(np.random.random(4), columns=['random'])
    
    # 格式化为小数点后两位的百分数
    out = df.style.format({
        'random': '{0:.2%}'.format,
    })
    
    out
    
    #>        random
        0    48.54%
        1    91.51%
        2    90.83%
        3    20.45%

    16.如何从dataframe中每隔n行构建dataframe

    df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')
    
    # 每隔20行读dataframe数据
    print(df.iloc[::20, :][['Manufacturer', 'Model', 'Type']])
    
    #>       Manufacturer    Model     Type
        0         Acura  Integra    Small
        20     Chrysler  LeBaron  Compact
        40        Honda  Prelude   Sporty
        60      Mercury   Cougar  Midsize
        80       Subaru   Loyale    Small
原文地址:https://www.cnblogs.com/huaobin/p/15687085.html