检测和过滤异常值

from pandas import DataFrame,Series
import pandas as pd
import numpy as np

np.random.seed(12345)
data = DataFrame(np.random.randn(1000,4))

# 找出某列中绝对值大于3的值
col = data[2]
print(col[np.abs(col>3)])
'''
5      3.248944
102    3.176873
324    3.260383
Name: 2, dtype: float64
'''
# 选出全部绝对值大于3的行
print(data[(np.abs(data)>3).any(1)])
'''
            0         1         2         3
5   -0.539741  0.476985  3.248944 -1.021228
97  -0.774363  0.552936  0.106061  3.927528
102 -0.655054 -0.565230  3.176873  0.959533
305 -2.315555  0.457246 -0.025907 -3.399312
324  0.050188  1.951312  3.260383  0.963301
400  0.146326  0.508391 -0.196713 -3.745356
499 -0.293333 -0.242459 -3.056990  1.918403
523 -3.428254 -0.296336 -0.439938 -0.867165
586  0.275144  1.179227 -3.184377  1.369891
808 -0.362528 -3.548824  1.553205 -2.186301
900  3.366626 -2.372214  0.851010  1.332846
'''
data[(np.abs(data)>3)] = np.sign(data)*3
print(data.describe())
原文地址:https://www.cnblogs.com/nicole-zhang/p/14959296.html