模型搭建练习1_用numpy和tensor、variable实现前后向传播、实现激活函数

用numpy实现搭建一个简单的forward和backward

 1 import numpy as np
 2 N, D_in, H, D_out = 64, 1000, 100, 10
 3 x = np.random.randn(N, D_in)      # (64, 1000)
 4 y = np.random.randn(N, D_out)     # (64, 10)
 5 w1 = np.random.randn(D_in, H)     # (1000, 100)
 6 w2 = np.random.randn(H, D_out)    # (100, 10)
 7 learning_rate = 1e-6
 8 
 9 for t in range(2):
10     # Forward pass: compute predicted y
11     h = x.dot(w1)                  # (64, 100)
12     h_relu = np.maximum(h, 0)      # (64, 100) 实现relu函数功能
13     y_pred = h_relu.dot(w2)        # (64, 10)
14 
15     loss = np.square(y_pred - y).sum()      # sum()所有元素求和
16     # Backprop to compute gradients of w1 and w2 with respect to loss
17     grad_y_pred = 2.0 * (y_pred - y)
18     grad_w2 = h_relu.T.dot(grad_y_pred)
19     grad_h_relu = grad_y_pred.dot(w2.T)
20     grad_h = grad_h_relu.copy()             # (64, 100)
21     grad_h[h < 0] = 0               # 在h中负元素对应位置处grad_h中置0 -> 实现relu函数功能
22     grad_w1 = x.T.dot(grad_h)       # .T是转置 (1000, 100)
23 
24     # Update weights
25     w1 -= learning_rate * grad_w1           # (1000, 100)
26     w2 -= learning_rate * grad_w2

用tensor实现搭建一个简单的forward和backward

 1 import torch
 2 
 3 dtype = torch.FloatTensor
 4 # dtype = torch.cuda.FloatTensor
 5 
 6 # N is batch size; D_in is input dimension;
 7 # H is hidden dimension; D_out is output dimension.
 8 N, D_in, H, D_out = 64, 1000, 100, 10
 9 
10 x = torch.randn(N, D_in).type(dtype)
11 y = torch.randn(N, D_out).type(dtype)
12 
13 # Randomly initialize weights
14 w1 = torch.randn(D_in, H).type(dtype)
15 w2 = torch.randn(H, D_out).type(dtype)
16 
17 learning_rate = 1e-6
18 for t in range(500):
19     # Forward pass: compute predicted y
20     h = x.mm(w1)        # 与numpy对比，dot点乘
21     h_relu = h.clamp(min=0)
22     y_pred = h_relu.mm(w2)
23 
24     loss = (y_pred - y).pow(2).sum()
25     # Backprop to compute gradients of w1 and w2 with respect to loss
26     grad_y_pred = 2.0 * (y_pred - y)
27     grad_w2 = h_relu.t().mm(grad_y_pred)
28     grad_h_relu = grad_y_pred.mm(w2.t())
29     grad_h = grad_h_relu.clone()
30     grad_h[h < 0] = 0
31     grad_w1 = x.t().mm(grad_h)
32 
33     # Update weights using gradient descent
34     w1 -= learning_rate * grad_w1
35     w2 -= learning_rate * grad_w2

用variable实现forward和backward

 1 # use PyTorch Variables and autograd to implement our two-layer network; 
# now we no longer need to manually implement the backward pass through the network
 2 
 3 import torch
 4 from torch.autograd import Variable
 5 
 6 dtype = torch.FloatTensor
 7 N, D_in, H, D_out = 64, 1000, 100, 10
 8 
 9 # Setting requires_grad=False indicates that we do not need to compute gradients with respect to these Variables during the backward pass.
10 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
11 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
12 
13 # Setting requires_grad=True indicates that we want to compute gradients with respect to these Variables during the backward pass.
14 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
15 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)
16 
17 learning_rate = 1e-6
18 for t in range(2):
19     # Forward pass: we do not need to keep references to intermediate values since we are not implementing the backward pass by hand
20     y_pred = x.mm(w1).clamp(min=0).mm(w2)
21 
22     # Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape (1,); loss.data[0] is a scalar value holding the loss.
23     loss = (y_pred - y).pow(2).sum()
24     # print(loss)                 # [torch.FloatTensor of size 1]
25     # print(loss.size())          # torch.Size([1])
26     # print(loss.data)            # [torch.FloatTensor of size 1]
27     print(loss.data[0])
28 
29     loss.backward()
30 
31     w1.data -= learning_rate * w1.grad.data
32     w2.data -= learning_rate * w2.grad.data
33 
34     w1.grad.data.zero_()
35     w2.grad.data.zero_()

用variable实现relu函数

 1 import torch
 2 from torch.autograd import Variable
 3 
 4 class MyReLU(torch.autograd.Function):
 5     def forward(self, input):
 6         self.save_for_backward(input)
 7         return input.clamp(min=0)
 8 
 9     def backward(self, grad_output):
10         input, = self.saved_tensors
11         grad_input = grad_output.clone()
12         grad_input[input < 0] = 0
13         return grad_input
14 
15 dtype = torch.FloatTensor
16 N, D_in, H, D_out = 64, 1000, 100, 10
17 
18 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
19 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
20 
21 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
22 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)
23 
24 learning_rate = 1e-6
25 for t in range(2):
26     relu = MyReLU()
27 
28     # Forward pass
29     y_pred = relu(x.mm(w1)).mm(w2)
30 
31     loss = (y_pred - y).pow(2).sum()
32     loss.backward()
33 
34     w1.data -= learning_rate * w1.grad.data
35     w2.data -= learning_rate * w2.grad.data
36 
37     w1.grad.data.zero_()
38     w2.grad.data.zero_()