sklearn 朴素贝叶斯

朴素贝叶斯的sklearn
1
from sklearn import datasets 2 iris = datasets.load_iris() 3 from sklearn.naive_bayes import GaussianNB 4 gnb = GaussianNB() 5 y_pred = gnb.fit(iris.data, iris.target).predict(iris.data) 6 print("Number of mislabeled points out of a total %d points : %d" 7 % (iris.data.shape[0],(iris.target != y_pred).sum())) 8 9 #贝叶斯估计的,带平滑,默认alpha为1 即拉普拉斯平滑 10 from sklearn.naive_bayes import MultinomialNB 11 clf = MultinomialNB(alpha=0.5) 12 y_pred1=clf.fit(iris.data, iris.target).predict(iris.data) 13 print("Number of mislabeled points out of a total %d points : %d" 14 % (iris.data.shape[0],(iris.target != y_pred1).sum())) 15 16 a1=clf.fit(iris.data, iris.target).predict_proba(iris.data) 17 a2=clf.fit(iris.data, iris.target).predict_log_proba(iris.data)

 pr 曲线 roc曲线 auc得分

print(__doc__)

import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, roc_auc_score

inputfile = sys.argv[1]

label_list = []
score_list = []
with open(inputfile, 'r') as fd:
    for line in fd:
        fs = line.strip().split('    ')
        label = int(fs[0])
        score = float(fs[1])
        label_list.append(label)
        score_list.append(score)

#roc曲线,假阳率fpr,真阳率tpr fpr, tpr, _ = roc_curve(label_list, score_list) auc = auc(fpr, tpr) #计算auc得分 auc_score=roc_auc_score(label_list, score_list) #pr曲线 precision, recall, _ = precision_recall_curve(label_list, score_list) ############################################################################## # Plot of a ROC curve for a specific class plt.figure() plt.plot(fpr, tpr) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve (auc = %.2f)' % auc) plt.legend(loc="lower right") plt.show() plt.figure() plt.plot(recall, precision) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('recall') plt.ylabel('precision') plt.title('Precision-Recall curve') plt.legend(loc="lower right") plt.show()

计算auc的

import numpy as np
import matplotlib.pyplot as plt
filepath='/home/hadoop/bigdata/nb/roi_auc_test/auc.raw'
f=open(filepath)
a=[]
b=[]
for line in f:
    ss=line.split('	')
    a.append([float(ss[0]),float(ss[1])])
f.close()
a=np.array(a)
data=a.T
data = data[:,data[1].argsort()]
b1=data[0]
b2=data[1]
a=0.0
x=0.0
y=0.0
for i in range(len(b1)):
    if b1[i]==-1:
        x+=1
        a+=y
    elif b1[i]==1:
        y+=1
    else:pass
print(1-a/(x*y))

根据定义计算auc

import numpy as np
import matplotlib.pyplot as plt
filepath='/home/hadoop/bigdata/nb/roi_auc_test/auc.raw'
f=open(filepath)
a=[]
for line in f:
    ss=line.split('	')
    a.append([float(ss[0]),float(ss[1])])
f.close()
a=np.array(a)
b=a.T
b1=b[0]
b2=b[1]
xmin,xmax=np.min(b2),np.max(b2)
n=5000
step=(xmax-xmin)/n
x1=[]
y1=[]
for i in range(n+1):
    x= xmin + i * step
    index=np.where(b2<=x)
    tn=np.sum(b1[index]==-1)
    fn=np.sum(b1[index]==1)
    index1=np.where(b2>x)
    fp=np.sum(b1[index1]==-1)
    tp=np.sum(b1[index1]==1)
    yy=tp*1.0/(tp+fn)
    xx=fp*1.0/(fp+tn)
    x1.append(xx)
    y1.append(yy)
data=np.array([x1,y1])
data = data[:,data[0].argsort()]
area=0.0
for i in range(data.shape[1]-1):
    area += (data[0][i+1]-data[0][i])*(data[1][i]+data[1][i+1])/2
print(area)
原文地址:https://www.cnblogs.com/skyturtle/p/10178876.html