- 如何从csv文件只读取前几行的数据
# 只读取前2行和指定列的数据 df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv',nrows=2,usecols=['Model','Length']) df #> Model Length 0 Integra 177 1 Legend 195
- 如何从csv文件中每隔n行来创建dataframe
# 每隔50行读取一行数据 df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv', chunksize=50) df2 = pd.DataFrame() for chunk in df: # 获取series df2 = df2.append(chunk.iloc[0,:]) #显示前5行 print(df2.head()) #> crim zn indus chas nox rm age \ 0 0.21977 0.0 6.91 0 0.44799999999999995 5.602 62.0 1 0.0686 0.0 2.89 0 0.445 7.416 62.5 2 2.7339700000000002 0.0 19.58 0 0.871 5.597 94.9 3 0.0315 95.0 1.47 0 0.40299999999999997 6.975 15.3 4 0.19072999999999998 22.0 5.86 0 0.431 6.718 17.5 dis rad tax ptratio b lstat medv 0 6.0877 3 233 17.9 396.9 16.2 19.4 1 3.4952 2 276 18.0 396.9 6.19 33.2 2 1.5257 5 403 14.7 351.85 21.45 15.4 3 7.6534 3 402 17.0 396.9 4.56 34.9 4 7.8265 7 330 19.1 393.74 6.56 26.2
- 如何改变导入csv文件的列值
改变列名‘medv’的值,当列值≤25时,赋值为‘Low’;列值>25时,赋值为‘High’.# 使用converters参数,改变medv列的值 df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv', converters={'medv': lambda x: 'High' if float(x) > 25 else 'Low'}) print(df.head()) #> b lstat medv 0 396.90 4.98 Low 1 396.90 9.14 Low 2 392.83 4.03 High 3 394.63 2.94 High 4 396.90 5.33 High
- 如何从csv文件导入指定的列
# 导入指定的列:crim和medv df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv', usecols=['crim', 'medv']) # 打印前四行dataframe信息 print(df.head()) #> crim medv 0 0.00632 24.0 1 0.02731 21.6 2 0.02729 34.7 3 0.03237 33.4 4 0.06905 36.2
- 如何得到dataframe的行,列,每一列的类型和相应的描述统计信息
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv') # 打印dataframe的行和列 print(df.shape) # 打印dataframe每列元素的类型显示前5行 print(df.dtypes.head()) # 统计各类型的数目,方法1 print(df.get_dtype_counts()) # 统计各类型的数目,方法2 # print(df.dtypes.value_counts()) # 描述每列的统计信息,如std,四分位数等 df_stats = df.describe() # dataframe转化数组 df_arr = df.values # 数组转化为列表 df_list = df.values.tolist() #> (93, 27) Manufacturer object Model object Type object Min.Price float64 Price float64 dtype: object float64 18 object 9 dtype: int64
- 如何获取给定条件的行和列
import numpy as np df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv') # print(df) # 获取最大值的行和列 row, col = np.where(df.values == np.max(df.Price)) # 行和列获取最大值 print(df.iat[row[0], col[0]]) df.iloc[row[0], col[0]] # 行索引和列名获取最大值 df.at[row[0], 'Price'] df.get_value(row[0], 'Price') #> 61.9