Logistic回归

一.环境配置

需要安装python的数据计算的库numpy和Matplotlib画出图形

sudo apt-get install python-numpy

sudo apt-get install python-matplotlib

为了方便还安装了ipython

sudo apt-get install ipython

二.编辑test.py

 1 from numpy import *
 2 
 3 def loadDataSet():
 4     dataMat = []; labelMat = []
 5     fr = open('testSet.txt')
 6     for line in fr.readlines():
 7         lineArr = line.strip().split()
 8         dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
 9         labelMat.append(int(lineArr[2]))
10     return dataMat,labelMat
11 
12 def sigmoid(inX):
13     return 1.0/(1+exp(-inX))
14 
15 def gradAscent(dataMatIn, classLabels):
16     dataMatrix = mat(dataMatIn)             #convert to NumPy matrix
17     labelMat = mat(classLabels).transpose() #convert to NumPy matrix
18     m,n = shape(dataMatrix)
19     alpha = 0.001
20     maxCycles = 500
21     weights = ones((n,1))
22     for k in range(maxCycles):              #heavy on matrix operations
23         h = sigmoid(dataMatrix*weights)     #matrix mult
24         error = (labelMat - h)              #vector subtraction
25         weights = weights + alpha * dataMatrix.transpose()* error #matrix mult
26     return weights
27 
28 def plotBestFit(weights):
29     import matplotlib.pyplot as plt
30     dataMat,labelMat=loadDataSet()
31     dataArr = array(dataMat)
32     n = shape(dataArr)[0] 
33     xcord1 = []; ycord1 = []
34     xcord2 = []; ycord2 = []
35     for i in range(n):
36         if int(labelMat[i])== 1:
37             xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
38         else:
39             xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
40     fig = plt.figure()
41     ax = fig.add_subplot(111)
42     ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
43     ax.scatter(xcord2, ycord2, s=30, c='green')
44     x = arange(-3.0, 3.0, 0.1)
45     y = (-weights[0]-weights[1]*x)/weights[2]
46     ax.plot(x, y)
47     plt.xlabel('X1'); plt.ylabel('X2');
48     plt.show()

运行命令

In [13]: test.plotBestFit(weights.getA())

In [14]: import test

In [15]: dataArr,labelMat=test.loadDataSet()

In [17]: test.gradAscent(dataArr,labelMat)
Out[17]:
matrix([[ 4.12414349],
        [ 0.48007329],
        [-0.6168482 ]])

In [18]: weights=test.gradAscent(dataArr,labelMat)

In [19]: test.plotBestFit(weights.getA())
画出的分类图形还不错就两个点分错了

原文地址:https://www.cnblogs.com/zeng-wei/p/3323222.html