import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

#自定义一个归一化函数（此函数的前提是我的数据是一个矩阵）
def regularize(xMat):
    inMat = xMat.copy()#创建一个副本，这样对inmat进行操作不会影响到xmat
    inMeans = np.mean(inMat,axis = 0) #求均值
    inVar = np.std(inMat,axis = 0) #求标准差
    inMat = (inMat - inMeans)/inVar #归一化
    return inMat

w=w-a*x.T(X*W-Y)/M

#参数分别为 数据集 学习率(步长) 简单粗暴的设置最高迭代次数
def gradDescent_0(dataSet,eps=0.01,numIt=50000):
    xMat = np.mat(dataSet.iloc[:, :-1].values)#分别为截取 x与y 放到矩阵（matrix）中
    yMat = np.mat(dataSet.iloc[:, -1].values).T
    xMat = regularize(xMat) #分别对X 与Y 进行归一化
    yMat = regularize(yMat) 
    m,n = xMat.shape
    weights = np.zeros((n,1)) #设我们的初始系数(权重)
    B=0
    for k in range(numIt): #迭代次数
        grad = xMat.T * (xMat * weights - yMat) / m #计算梯度
        B=
        weights = weights - eps * grad#更新权重
    return weights

aba = pd.read_table('abalone.txt', header = None)#该数据集源于UCI，记录了鲍⻥的⽣物属性，⽬标字段是该⽣物的年龄
aba.head()

aba.shape

(4177, 9)

aba.tail()

aba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 9 columns):
0    4177 non-null int64
1    4177 non-null float64
2    4177 non-null float64
3    4177 non-null float64
4    4177 non-null float64
5    4177 non-null float64
6    4177 non-null float64
7    4177 non-null float64
8    4177 non-null int64
dtypes: float64(7), int64(2)
memory usage: 293.8 KB

aba[1].isnull().value_counts()

False    4177
Name: 1, dtype: int64

#计算

matrix([[ 0.01501141],
        [ 0.0304397 ],
        [ 0.31206232],
        [ 0.15730289],
        [ 0.38951011],
        [-0.94609003],
        [-0.10003637],
        [ 0.74573878]])

#用最小二乘得出结论以对比结果
xMat = np.mat(aba.iloc[:, :-1].values)
yMat = np.mat(aba.iloc[:, -1].values).T
xMat = regularize(xMat)
yMat = regularize(yMat)
xTx = xMat.T*xMat
ws = xTx.I * (xMat.T*yMat)
ws

matrix([[ 0.0162406 ],
        [-0.05874764],
        [ 0.41308287],
        [ 0.15391644],
        [ 1.4069792 ],
        [-1.39621019],
        [-0.3318546 ],
        [ 0.37046383]])

aba[0].mean()

0.052908786210198705

weights = np.zeros((9,1))

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

SGD随机梯度下降法

随机梯度下降法，其实和批量梯度下降法原理类似，区别在与求梯度时没有⽤所有的m个样本的数据，⽽是仅仅选取⼀个样本j来求梯度。

另：由于是随机抽取一部分做梯度，随机性太强，所以最好加大迭代次数到6为数左右

又：由于随机梯度下降输出系数随机性较大，所以不适合用来解线性回归

#可见基本操作都是一样的，区别在于我们对数据集进行了有放回抽样
def gradDescent_1(dataSet,eps=0.01,numIt=500000):
    dataSet = dataSet.sample(numIt, replace=True)#sample函数进行随机抽样多少次，参数 replace=True 表示 是有放回的
    dataSet.index = range(dataSet.shape[0])#对index 进行规整化
    xMat = np.mat(dataSet.iloc[:, :-1].values)
    yMat = np.mat(dataSet.iloc[:, -1].values).T
    xMat = regularize(xMat)
    yMat = regularize(yMat) 
    m, n = xMat.shape
    weights = np.zeros((n,1))
    for i in range(m): #迭代次数
        grad = xMat[i].T * (xMat[i] * weights - yMat[i])#这里每次计算梯度时 只选取一条
        weights = weights - eps * grad
    return weights

import time
%time gradDescent_1(aba)#几次计算的系数均不一致，是因为结果本身就是一个解空间，最终sse 可能差别不大

Wall time: 46.6 s

matrix([[ 0.05499166],
        [-0.11769103],
        [ 0.34716992],
        [ 0.40666376],
        [ 1.41184507],
        [-1.25264229],
        [-0.33662608],
        [ 0.28914221]])

def sseCal(dataSet, regres):#设置参数为 数据集 与 回归方法
    n = dataSet.shape[0] 
    y = dataSet.iloc[:, -1].values
    ws = regres(dataSet)
    yhat = dataSet.iloc[:, :-1].values * ws
    yhat = yhat.reshape([n,])
    rss = np.power(yhat - y, 2).sum()
    return rss

%%time
n=aba.shape[0]
y=aba.iloc[:,-1].values
ws=gradDescent_1(aba)
yhat=aba.iloc[:,:-1].values * ws
yhat=yhat.reshape([n,])
rss = np.power(yhat - y, 2).sum()
rss

 
Wall time: 46.9 s

rss

379691.65605548245

#封装R**2
def rSquare(dataSet, regres):#设置参数为 数据集 与 回归方法
    sse = sseCal(dataSet, regres) 
    y = dataSet.iloc[:, -1].values
    sst = np.power(y - y.mean(), 2).sum()
    return 1 - sse / sst

rSquare(aba, gradDescent_1)

-7.374175454585444

	0	1	2	3	4	5	6	7	8
0	1	0.455	0.365	0.095	0.5140	0.2245	0.1010	0.150	15
1	1	0.350	0.265	0.090	0.2255	0.0995	0.0485	0.070	7
2	-1	0.530	0.420	0.135	0.6770	0.2565	0.1415	0.210	9
3	1	0.440	0.365	0.125	0.5160	0.2155	0.1140	0.155	10
4	0	0.330	0.255	0.080	0.2050	0.0895	0.0395	0.055	7

	0	1	2	3	4	5	6	7	8
4172	-1	0.565	0.450	0.165	0.8870	0.3700	0.2390	0.2490	11
4173	1	0.590	0.440	0.135	0.9660	0.4390	0.2145	0.2605	10
4174	1	0.600	0.475	0.205	1.1760	0.5255	0.2875	0.3080	9
4175	-1	0.625	0.485	0.150	1.0945	0.5310	0.2610	0.2960	10
4176	1	0.710	0.555	0.195	1.9485	0.9455	0.3765	0.4950	12

A--python梯度下降算法实践

SGD随机梯度下降法

随机梯度下降法，其实和批量梯度下降法原理类似，区别在与求梯度时没有⽤所有的m个样本的数据，⽽是仅仅选取⼀个样本j来求梯度。

另：由于是随机抽取一部分做梯度，随机性太强，所以最好加大迭代次数到6为数左右

又：由于随机梯度下降输出系数随机性较大，所以不适合用来解线性回归