深入Pytorch微分传参

导数

这段代码揭示了多个变量的微分以及如何求解loss为向量的导数

m1 = Variable(torch.ones((3,2)), requires_grad=True)
m2 = Variable(torch.ones((3,2))*2, requires_grad=True)
m3 = Variable(torch.ones((3,2))*4, requires_grad=True)
x1 = m1*m2
x2 = x1 *m3
y = x1  + x2
gradients= torch.ones((3,2))
y.backward(gradients)
print(f"m1 grad:{m1.grad}, 
 m2 grad:{m2.grad}, 
 m3 grad:{m3.grad}, 
 x1 grad:{x1.grad}, 
 x2 grad:{x2.grad}, 
 y grad:{y.grad}")

深入导数--hook机制

hook机制的详细解释

这段代码解释了导数是如何自动计算保存的，

import torch
from torch.autograd import Variable

def register_hook(self, hook):
        r"""Registers a backward hook.

        The hook will be called every time a gradient with respect to the
        Tensor is computed. The hook should have the following signature::

            hook(grad) -> Tensor or None


        The hook should not modify its argument, but it can optionally return
        a new gradient which will be used in place of :attr:`grad`.

        This function returns a handle with a method ``handle.remove()``
        that removes the hook from the module.

        Example::

            >>> v = torch.tensor([0., 0., 0.], requires_grad=True)
            >>> h = v.register_hook(lambda grad: grad * 2)  # double the gradient
            >>> v.backward(torch.tensor([1., 2., 3.]))
            >>> v.grad

             2
             4
             6
            [torch.FloatTensor of size (3,)]

            >>> h.remove()  # removes the hook
        """
        if not self.requires_grad:
            raise RuntimeError("cannot register a hook on a tensor that "
                               "doesn't require gradient")
        if self._backward_hooks is None:
            self._backward_hooks = OrderedDict()
            if self.grad_fn is not None:
                self.grad_fn._register_hook_dict(self)
        handle = hooks.RemovableHandle(self._backward_hooks)
        self._backward_hooks[handle.id] = hook
        return handle

v = Variable(torch.Tensor([2, 2, 2]), requires_grad=True)
h = v.register_hook(lambda grad: grad * grad)  # double the gradient
v.backward(torch.Tensor([1, 1, 2]))
#先计算原始梯度，再进hook，获得一个新梯度。
print(v.grad.data)
# print(v.data)
# v.grad.data=torch.Tensor([0, 0, 0]) 梯度不置0就会根据hook自动累加
v.backward(torch.Tensor([1, 1, 1]))
print(v.grad.data)
# print(v.data)
h.remove()  # removes the hook

使用`with torch.no_grad()`

with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print('epoch %d, loss %f' % (epoch + 1, train_l.mean().numpy()))

SGD

这段代码揭示了一个最简单运用梯度下降的模型

import torch
from torch.autograd import Variable
from torch.distributions import normal
NUMBER = 100
# X = normal.Normal(loc = 0, scale = 1).sample((1, NUMBER))
X = torch.ones((1, NUMBER))*NUMBER
X= Variable(X, requires_grad=False)
b = torch.ones(X.shape[0])
b.requires_grad=True
epoch = 200
for i in range(epoch):
    loss = torch.sum((b-X) ** 2)
    b.grad = Variable(torch.zeros(X.shape[0])) #梯度置0
    loss.backward()
    b.data = b.data- b.grad * (1/NUMBER)/10
    if not i%10:
        print(f" {i} b is: {b}, b.grad is: {b.grad}")

深入Pytorch微分传参

导数

深入导数--hook机制

使用with torch.no_grad()

SGD

使用`with torch.no_grad()`