UDA机器学习基础—异常值-安然数据处理

#!/usr/bin/python

import pickle
import sys
import matplotlib.pyplot
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit


### read in data dictionary, convert to numpy array
data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "r") )
#print data_dict
#print len(data_dict)
data_dict.pop("TOTAL")
#print len(data_dict)
for i in data_dict:
     #print i
     if data_dict[i]["salary"]>1000000 and data_dict[i]["salary"]!="NaN" and data_dict[i]["bonus"]>5000000:
         print i
         print data_dict[i]["salary"]
         print data_dict[i]["bonus"]
features = ["salary", "bonus"]
data = featureFormat(data_dict, features)
#print data


### your code below
for point in data:
    salary = point[0]
    bonus = point[1]
    #if salary>
    matplotlib.pyplot.scatter( salary, bonus )
#matplotlib.pyplot.scatter(2.6704229e+07 ,9.7343619e+07,color="black")
matplotlib.pyplot.xlabel("salary")
matplotlib.pyplot.ylabel("bonus")
matplotlib.pyplot.show()

  

原文地址:https://www.cnblogs.com/fuhang/p/8512707.html