Deep Learning菜鸡篇，我的第一个深度神经网络

　　看了一大堆一大堆的框架，作为一个low B , 我还是喜欢从底层实现开始，看了吴恩达Coursera上的视频，同时也在网站上做了一些编程练习，不得不说课程的质量和练习题的质量都是

杠杠的，很到位，这篇博客算是对第一个课程的总结。

　　说是深度的神经网络，也就是层数深一点的全连接网络，只是练练手顺便复习一下课程，我用的样本是自己做的，（30000,400）的训练样本，附件里会上传，正样本是car，200个正样本，

200个负样本，测试集大小（30000,100），样本的数量神马的选取的都不合理，勿喷，下面上代码

++++++++++++++++++++++++++这一部分是需要的函数 relu 和 sigmoid
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import numpy as np

def sigmoid(Z):
    """
       Implements the sigmoid activation in numpy

       Arguments:
       Z -- numpy array of any shape

       Returns:
       A -- output of sigmoid(z), same shape as Z
       cache -- returns Z as well, useful during backpropagation
       """

    A = 1 / (1 + np.exp(-Z))
    cache = Z

    return A, cache

def relu(Z):
    """
        Implement the RELU function.

        Arguments:
        Z -- Output of the linear layer, of any shape

        Returns:
        A -- Post-activation parameter, of the same shape as Z
        cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
        """

    A = np.maximum(0, Z)

    assert (A.shape == Z.shape)

    cache = Z
    return A, cache

# def sigmoid_backprob(dA, cache):
#     """
#     :param dA:
#     :param cache:
#     :return:
#     """
#     Z = cache
#     tmp , _ = sigmoid(Z)
#     dZ = dA * tmp * (1 - tmp)
#     return dZ

def sigmoid_backprob(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """

    Z = cache

    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)

    assert (dZ.shape == Z.shape)

    return dZ

def relu_backprob(dA, cache):
    Z = cache
    Daz = Z > 0
    dZ = dA * Daz
    return dZ


# def relu_backprob(dA, cache):
#     """
#     Implement the backward propagation for a single RELU unit.
#
#     Arguments:
#     dA -- post-activation gradient, of any shape
#     cache -- 'Z' where we store for computing backward propagation efficiently
#
#     Returns:
#     dZ -- Gradient of the cost with respect to Z
#     """
#
#     Z = cache.reshape(dA.shape)
#     dZ = np.array(dA, copy=True) # just converting dz to a correct object.
#
#     # When z <= 0, you should set dz to 0 as well.
#     dZ[Z <= 0] = 0
#
#     assert (dZ.shape == Z.shape)
#
#     return dZ

++++++++++++++++++++++++++这一部分是样本的读取++++++++++++++++++++++++++++++++++++++

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import numpy as np

def load_data_set():
    train_set = np.load('train_set.npy')
    train_label = np.load('train_label.npy')
    test_set = np.load('test_set.npy')
    test_label = np.load('test_label.npy')

    return train_set, train_label, test_set, test_label


+++++++++++++++++++++++网络+++++++++++++++++++++++++++++

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import numpy as np
from activation_function import sigmoid, relu, sigmoid_backprob, relu_backprob
from read_data import load_data_set
train_set,train_label,test_set, test_label = load_data_set()

def init_parameters_deep(layer_items):
    paramters= {}
    L= len(layer_items)
    for l in range(1,L):
        W = np.random.randn(layer_items[l], layer_items[l-1])*0.01
        b = np.zeros((layer_items[l], 1))
        paramters['W'+str(l)]= W
        paramters['b'+str(l)]= b

    return paramters

def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

def linear_activation_forward(A_prev, W, b, activation):

    Z, linear_cache = linear_forward(A_prev, W, b)
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)

    elif activation == 'relu':
        A, activation_cache = relu(Z)

    cache = (linear_cache, activation_cache)
    return A, cache

def L_forward_model(X,parameters):
    caches = []
    L = len(parameters) // 2
    A = X
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W'+str(l)], parameters['b'+str(l)], activation= 'relu')
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W'+str(L)], parameters['b'+str(L)], activation= 'sigmoid')
    caches.append(cache)

    return AL, caches

def compute_cost(AL, Y,):
    m = AL.shape[1]
    cost = -1/m *(Y* np.log(AL) + (1 - Y)* np.log(1- AL)).sum()
    cost = np.squeeze(cost)
    assert (cost.shape == ())
    return cost

def linear_back(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def linear_activation_back(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == 'sigmoid':
        dZ = sigmoid_backprob(dA, activation_cache)
        dA_prev, dW, db = linear_back(dZ, linear_cache)

    elif activation == 'relu':
        dZ = relu_backprob(dA, activation_cache)
        dA_prev, dW, db = linear_back(dZ, linear_cache)

    return dA_prev, dW, db

def L_backprob_model(AL, Y, caches):
    grads = {}
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    L = len(caches)
    cache = caches[L-1]
    grads['dA'+str(L-1)] , grads['dW'+str(L)], grads['db'+str(L)] = linear_activation_back(dAL, cache, activation= 'sigmoid')

    for l in reversed(range(L- 1)):
        cache = caches[l]
        grads['dA' + str(l)], grads['dW' + str(l+1)], grads['db' + str(l+1)] = linear_activation_back(dAL, cache,activation='relu')

    return grads

def update_parameters(parameters, grads, learning_rate = 0.01):
    L = len(parameters) // 2
    for l in range(1,L+1):
        parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)]
    return parameters

def L_nn_deep(layer_items, X, Y, num_iter, learning_rate,print_cost = False):
    costs = []
    parameters = init_parameters_deep(layer_items)

    for i in range(num_iter):
        AL, caches = L_forward_model(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_backprob_model(AL, Y, caches)
        parameters = update_parameters(parameters ,grads, learning_rate)
        if i%100 == 0:
            costs.append(cost)
        if print_cost:
            print('第%d次迭代，cost:%f'%(i, cost))

    return parameters

layers = [30000, 10000, 5000, 1000, 100,1]

p = L_nn_deep(layers, train_set, train_label, 2000, 0.01, True)

我这个参数神马的，还有数据集都坑的要死，反正我的内存被直接刷爆了，，懒得改了