缺失值处理

缺失值处理
import
numpy as np import pandas as pd data = np.array([1,2,5,4,np.nan]) print(data) print('numpy 有专门的nansum,nanmax等操作处理缺失值') # r = data.sum() r = np.nansum(data) print(r) print('pandas 将None和np.nan都处理为NaN') data = pd.Series([2,3,None, 4,5,np.nan]) print(data) print('-'*10,'获取缺失值','-'*10) index = data.isnull() print(data[index]) print('-'*10,'剔除缺失值','-'*10) df = pd.DataFrame([[1,2,np.nan], [4,None,5], [6,7,8]]) print(df) dd = df.dropna() print(dd) print('原始的df不会去掉nan,需要接一下') print('剔除缺失值的列') dd = df.dropna(axis = 1)#等价于axis=‘columns’ print(dd) print('how="any",只要存在缺失值就剔除;how=all,全部都是缺失值才会剔除') dd = df.dropna(how="any") print(dd)
#缺失值填充
import
numpy as np import pandas as pd df = pd.DataFrame([[1,2,np.nan], [4,None,5], [6,7,8]], index=list('abc'),columns=list('ABC')) print('-'*10,'原始df数据','-'*10) print(df) print('-'*10,'缺失值填充默认值0','-'*10) dd = df.fillna(0) print(dd) print('-'*10,'缺失值填充前一行元素','-'*10) dd = df.fillna(method='ffill') print(dd) print('-'*10,'缺失值填充后一行元素','-'*10) dd = df.fillna(method='bfill') print(dd) print('-'*10,'缺失值填充前一列元素','-'*10) dd = df.fillna(method='ffill',axis = 1) print(dd) print('-'*10,'缺失值填充后一列元素','-'*10) dd = df.fillna(method='bfill',axis = 1) print(dd) print('-'*10,'按列的平均值进行填充','-'*10) for i in df.columns:#df.index就是按照行求平均值 fill = np.nanmean(df[i]) dd[i]=df[i].fillna(fill) print(dd)
原文地址:https://www.cnblogs.com/yunshangyue71/p/13584338.html