自己利用python动手实现手写数字识别（逻辑回归）

自己利用python动手实现手写数字识别

数据准备和环境
主程序(调度用的）
数据可视化（displayData）
求损失和梯度的程序（lrCostFunction）
一对多训练（oneVsAll）
批量优化theta（oneVsAll)
预测（Predict）
精度验证(predictOneVsAll)

在这里我自己用python写了识别手写0-9数字的程序，利用了逻辑回归方法以及正则化，one-vs-all的技巧（如果不知道这两个概念的可以看一下我以前发的博客）。

数据准备和环境

这里的数据是一个5000 × 400的矩阵，意思就是有5000张图片而每一张都是20*20的灰度图片（这里是被拉伸成了一行），y就是标签是个5000×1的矩阵，取值范围为1-10，这里10就代表0。而且数据是以mat的形式存储的(matlab存文件就是这个形式）。
这里为了方便矩阵运算，读取文件和画图需要引入numpy,matplot,scipy三个库。

主程序(调度用的）

# ================== Multi-class Classification | part 1: one vs all ========================
from scipy.io import loadmat  # read matfile
import numpy as np
import displayData
import lrCostFunction
import oneVsAll
import Predict
import matplotlib.pyplot as plt
import predictOneVsAll
# ================== load Data =============================================================
data = loadmat("../machine-learning-ex3/ex3/ex3data1.mat")
x_data = data["X"]   # x_data here is a 5000 * 400 matrix (5000 training examples, 20 * 20 grid of pixel is unrolled into 4000-deminsional vector
y_data = data.get("y")  # y_data here is a 5000 * 1 matrix(label)
# hint : must transpose the data to get the oriented data
x_data = np.array([im.reshape((20, 20)).T for im in x_data])
x_data = np.array([im.reshape((400, )) for im in x_data])
print(y_data)
# can use data.key() and debugging to get this information
# ================== load end ==============================================================
# set some params
input_layer_size = 400
num_labels = 10
# ================== visualize the data ====================================================
rand = np.random.randint(0, 5000, (100, ))  # [0, 5000)
displayData.data_display(x_data[rand, :])   # get 100 images randomly
# ======================= Test case for lrCostFunction =============================
theta_t = np.array([-2, -1, 1, 2])
t = np.linspace(1, 15, 15) / 10
t = t.reshape((3, 5))
x_t = np.column_stack((np.ones((5, 1)), t.T))
y_t = np.array([1, 0, 1, 0, 1])
l_t = 3
cost = lrCostFunction.cost_reg(theta_t, x_t, y_t, l_t)
grad = lrCostFunction.grad_reg(theta_t, x_t, y_t, l_t)
print("cost is {}".format(cost))
print("expected cost is 2.534819")
print("grad is {}".format(grad))
print("expected grad is 0.146561 -0.548558 0.724722 1.398003")
# ============================ test end =============================================
# ============================ one vs all:predict ===========================================
l = 0.1
theta = oneVsAll.one_vs_all(x_data, y_data, l, num_labels)
result = Predict.pred_lr(theta, x_data[1500, :])
np.set_printoptions(precision=2, suppress=True)  # don't use  scientific notation
print("this number is {}".format(result))  # 10 here is 0
plt.imshow(x_data[1500, :].reshape((20, 20)), cmap='gray', vmin=-1, vmax=1)
plt.show()
accuracy = predictOneVsAll.pred_accuracy(theta, x_data, y_data)
print("test 5000 images, accuracy is {:%}".format(accuracy))
# ============================ predict  end ======================================================

数据可视化（displayData）

这里需要说明的是这里的x_data数据需要把其还原成20*20的矩阵再转置才能画出摆放方向正确的图像。

import numpy as np
import matplotlib.pyplot as plt


def data_display(data, image_width=20):
    # compute image_height
    m, n = data.shape
    image_height = n / image_width
    # compute rows and cols
    display_rows = np.round(np.sqrt(m))
    display_cols = m / display_rows
    # set image padding
    pad = 1
    rows_pixel = np.ceil(display_rows * image_height + (display_rows + 1) * pad)
    cols_pixel = np.ceil(display_cols * image_width + (display_cols + 1) * pad)
    rows_pixel = rows_pixel.astype(np.int64)
    cols_pixel = cols_pixel.astype(np.int64)
    # initialize display matrix
    display_matrix = -np.ones((rows_pixel, cols_pixel))
    # the first pixel of every image is 1+(image_width+pad)*(n-1) or 1+(image_height+pad)*(n-1)
    for i in range(data.shape[0]):
        image_data = data[i, :].reshape((int(image_width), int(image_height)))
        row_index = np.floor(i / display_cols)
        cols_index = i % display_cols
        row_position = pad+(image_height+pad)*row_index
        cols_position = pad+(image_width+pad)*cols_index
        display_matrix[int(row_position):int(row_position+image_height), int(cols_position):int(cols_position+image_width)] = image_data[:, :]
    plt.imshow(display_matrix, cmap='gray', vmin=-1, vmax=1)
    plt.show()

我这里在展示手写图片的时候，只需要一次循环就可以完成，主要的技巧就是利用等差数列求出来每张图片左上角的像素在大矩阵中的位置即可比较方便地完成图片展示。
在这里插入图片描述
这就是展示出来的样子，可以调节pad来控制图片的黑边间隔。

求损失和梯度的程序（lrCostFunction）

import numpy as np


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def cost_reg(theta, x_data, y_data, l):
    m, n = x_data.shape
    theta = theta.reshape((-1, 1))
    y_data = y_data.reshape((-1, 1))
    hx = sigmoid(np.dot(x_data, theta))
    ln_h = np.log(hx)
    part1 = -np.dot(ln_h.T, y_data) / m
    ln_1h = np.log(1 - hx)
    part2 = -np.dot(ln_1h.T, 1-y_data) / m
    #   don't do that : theta[0, 0] = 0  # don't penalize theta0
    reg = l * np.dot(theta[1:, :].T, theta[1:, :]) / (2 * m)
    return (part1+part2+reg).flatten()


def grad_reg(theta, x_data, y_data, l):
    m, n = x_data.shape
    theta_tempt = theta.reshape((-1, 1)).copy()
    y_data = y_data.reshape((-1, 1))
    hx = sigmoid(np.dot(x_data, theta_tempt))
    theta_tempt[0, 0] = 0  # don't penalize theta0
    part1 = np.dot(x_data.T, hx - y_data)
    part2 = l * theta_tempt
    return ((part1+part2) / m).flatten()

这里比较坑的一个地方就是第二个函数求梯度的时候一定要进行拷贝，不然的话直接对这个引用进行操作将其[0,0]的元素赋值为0就会导致外部的theta改变。

一对多训练（oneVsAll）

import numpy as np
import fmincg

def one_vs_all(x_data, y_data, l, num_labels):
    # initialize some params
    m, n = x_data.shape
    x_data = np.column_stack((np.ones((m, 1)), x_data))
    # initialize initial_theta
    initial_thata = np.zeros((num_labels, n+1))
    theta = fmincg.fmincg(initial_thata, x_data, y_data, l, num_labels)
    return theta

这里就是一对多的训练，我们要进行k分类就需要训练出k个分类器。

批量优化theta（oneVsAll)

import scipy.optimize as op
import numpy as np
import lrCostFunction


def fmincg(theta, x_data, y_data, l, num_labels):
    for i in range(num_labels):
        y_tempt = y_data.copy()  # hint: must copy !
        # pre-treat
        pos = np.where(y_data == i)
        neg = np.where(y_data != i)
        if i == 0:
            pos = np.where(y_data == 10)
            neg = np.where(y_data != 10)
        y_tempt[pos] = 1
        y_tempt[neg] = 0
        result = op.minimize(lrCostFunction.cost_reg, theta[i, :].T, args=(x_data, y_tempt, l), method="TNC", jac=lrCostFunction.grad_reg)
        print("{} : {}".format(i, result.success))
        theta[i, :] = result.x
    return theta

这里就会返回一个10*401（10个分类器）的theta矩阵，这里利用scipy中比梯度下降更快的高级算法，这个算法一般不会需要自己去实现，除非对自己的数值计算很有信心。当然我在github上面也放上了自己实现的梯度下降算法也可以来优化theta，但是速度肯定要慢一些。

预测（Predict）

import numpy as np
import lrCostFunction


def pred_lr(theta, x_data):
    x_data = x_data.reshape((1, -1))
    x_data = np.column_stack((np.ones((1, 1)), x_data))
    result = lrCostFunction.sigmoid(x_data @ theta.T)
    predict = np.argmax(result.flatten())
    if predict == 0:
        predict = 10
    return predict

这就是预测的代码。

精度验证(predictOneVsAll)

import Predict

def pred_accuracy(theta, x_data, y_data):
    right_num = 0
    m, _ = x_data.shape
    for i in range(m):
        pred = Predict.pred_lr(theta, x_data[i, :])
        if pred == y_data[i, :]:
            right_num += 1
    return right_num / m

还有神经网络版本的实现，我就放在我的github上面了。
https://github.com/lsl1229840757/machine_learning/tree/master/programming_ex3/ex3