Logistic回归

逻辑斯蒂回归

对于目标值是离散变量的两类分类问题，假设目标值为{0,1}，所以先改变模型使其预测值在[0,1]之间，我们选择这样一个函数：

分类概率值：

似然函数：

对数似然函数：

更新规则：

 1  # coding:utf-8
 2 import matplotlib.pyplot as plt
 3 import numpy as np
 4 
 5 def dataN(length):#生成数据
 6     x = np.ones(shape = (length,3))
 7     y = np.zeros(length)
 8     for i in np.arange(0,length/100,0.02):
 9         x[100*i][1]=i
10         x[100*i][2]=i + 1 + np.random.uniform(0,1.2)
11         y[100*i]=1
12         x[100*i+1][1]=i+0.01
13         x[100*i+1][2]=i+0.01 + np.random.uniform(0,1.2)
14     return np.mat(x),np.mat(y).T
15 
16 def sigmoid(x): #simoid 函数
17     return 1.0/(1+np.exp(-x))
18 
19 def alphA(x,y): #选取前20次迭代cost最小的alpha
20     c=float("inf")
21     for k in range(1,1000):
22             a=1.0/k**3
23             f=gD(x,y,20,a)[1][-1]
24             if f>c:
25                 break
26             c=f
27             alpha=a
28     return alpha
29 
30 def gD(x,y,iter,alpha):#梯度下降
31     theta=np.ones((3,1))
32 
33     cost=[]
34     for i in range(iter):
35         hypothesis =sigmoid(np.dot(x,theta))
36         loss = hypothesis-y
37         cost.append(np.sum(loss[0]**2))
38         gradient = np.dot(x.transpose(),loss)
39         theta = theta -alpha * gradient
40     return theta,cost
41 
42 def tesT(theta, x, y):#准确率
43     length=len(x)
44     count=0
45     for i in xrange(length):
46         predict = sigmoid(x[i, :] * theta)[0, 0] > 0.5
47         if predict == bool(y[i, 0]):
48             count+= 1
49     accuracy = float(count)/length
50     return accuracy
51 
52 length=200
53 iter=1000
54 x,y=dataN(length)
55 theta,cost=gD(x,y,iter,alphA(x,y))
56 print tesT(theta, x, y) #0.92
57 
58 plt.figure(1)
59 plt.plot(range(iter),cost)
60 plt.figure(2)
61 color=['or','ob']
62 for i in xrange(length):
63     plt.plot(x[i, 1], x[i, 2],color[int(y[i])])
64 theta = theta.getA()
65 plt.plot([0,length/100],[-theta[0],-theta[0]-theta[1]*length/100]/theta[2])
66 plt.show()