pytorch学习笔记

基本数据类型

5月16号更新

---------------------------------------------

保存模型

        print("Saving state, iter:", str(epoch))
        torch.save(model.state_dict(), f'logs/Epoch{epoch}-acc{acc}.pth')

加载模型 / 用于预训练

      model = ResNet18().to(device)
    # ----------------------------#
    model_path = r"logs/Epoch2-acc0.6816.pth"
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict, strict=True)

使用部分预训练权重(5月21号更新)

      # -----使用部分预训练权重------------------#
    model_path = r"logs/Epoch2-acc0.6831.pth"
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}  # 上面不能用时用下面这个
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')
    # ---------------------------------------#

---------------------------------------------

Data type

type check

a = torch.randn(2, 3)    # 随机生成2行3列的矩阵
print(a.shape)
print(a.size(1)) # 返回shape的第2个元素
print(a.shape[1])    # 3

# cpu上
print(a.type())    # torch.FloatTensor
print(type(a))
print(isinstance(a, torch.FloatTensor))

# Gpu上
data = a.cuda()
print(isinstance(data, torch.cuda.FloatTensor))

"""
在pytorch0.3的版本中dimention为0的tensor是不存在的，如果表达是标量返回[0.3]
在之后的版本中，标量返回0.3 (为了语义更加清晰，使用长度为0的标量)

区分dim/size/shape/tensor
[2, 2]
dim: 2  rank
size/shape: [2, 2]  
tensor: 具体数字 [1, 3 ]
                 [2, 4]
"""

Dimension 0/1/2

# Dim=0,用于loss
a = torch.tensor(2.2)
print(a.shape)    # torch.Size([])
print(len(a.shape))    # 0
print(a.size())    # torch.Size([])

# Dim=1,用于Bias/Linear input
b = torch.tensor([2])    # 直接这样写，里面的数据类型跟着里面数据变化
print(b)
print(b.type())
c = torch.tensor([1.1, 2.2])
print(c)
print(c.type())
d = torch.FloatTensor(2)
print(d)
e = torch.IntTensor([2.2])
print(e)

data = np.ones(3)
print(data)
f = torch.from_numpy(data)    # 将numpy转换成tensor
print(f)

# Dim=2,Linear input/batch
g = torch.randn(2, 3)    # 随机正太分布
print(g)
print(g.shape)
print(g.size())
print(g.size(0))
print(g.size(1))
print(g.shape[1])


# Dim=3 RNN input/Batch
h = torch.rand(3, 2, 3)    # 随机均匀分布
print(h)
print(h.shape)
print(h[0])
print(h[1])
print(list(h.shape))


# Dim=4 CNN:[b,c,h,w]
# 下面解释为2张照片，每张照片通道数为3，长宽为28×28
i = torch.rand(2, 3, 28, 28)    # 照片数 通道数(彩色图片为3) 图片长 图片宽
print(i)

创建Tensor

import from numpy

import torch
import numpy as np

# 从numpy中导入
a = np.array([2, 3.3])
data = torch.from_numpy(a)
print(data)
b = np.ones([3, 4])
dd = torch.from_numpy(b)
print(dd)

import from list

# 从list中导入
# 大写的Tensor():与FloatTensor类似，接受shape作为参数，小写的接受现有的数据
c = torch.tensor([2., 3.2])
d = torch.FloatTensor([2., 3.2])    # 也可接受现有数据，但是数据必须用一个list来表示。如果接受shape：（2, 3）
e = torch.tensor([[2., 3.2], [1., 22.3]])
print(c)
print(d)
print(e)

uninitialized

# 生成未初始化数据:只是作为一个容器，后面会把数据写进来
# torch.empty() : 给shape
# torch.FloatTensor(d1, d2, d3)
# torch.IntTensor(d1, d2, d3)

f = torch.empty(2, 3)
print(f)
print(torch.Tensor(2, 3))    # 数据大小相差大，记得覆盖否则可能出现torch.not number或torch.infinity
print(torch.IntTensor(2, 3))
print(torch.FloatTensor(2, 3))

set default type

# set default type: torch中默认的类型是torch.FloatTensor
print(torch.tensor([1.2, 3]).type())
torch.set_default_tensor_type(torch.DoubleTensor)
print(torch.tensor([1.2, 3]).type())

rand/rand_like, randint

# rand/rand_like, randint
# rand : [0, 1]    均匀分布
# rand_like: [min, max)    最大值不包含在里面
# randint *_like

print(torch.rand(3, 3))    # 比较均匀的采样出来
a = torch.rand(3, 3)
print(torch.rand_like(a))    # rand_like接受的参数是一个tensor,相当于把a.shape读出来再送给rand函数

print(torch.randint(1, 10, [3, 3]))

randn

# randn: 正态分布
# N(0, 1) 用在bias比较多
# N(u, std)
print(torch.randn(3, 3))
#                   full函数生成长度为10都为0的list    反差从1到0慢慢减小
print(torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1)))

full

# full
print(torch.full([2, 3], 7))

print(torch.full([], 7))    # dim=0

print(torch.full([1], 7))    # dim=1

arange/range

print(torch.arange(0, 10))    # 不包括10
print(torch.arange(0, 10, 2))

linspace/logspace

print(torch.linspace(0, 10, steps=4))    # 等分的切，包括10
print(torch.logspace(0, 1, steps=10))    # 切10等分，每个取指数0**10~1**10

Ones/zeros/eye

# ones:生成全是0的，直接给出shape
# zeros:生成全是1的
# eye: 生成对角线全是1的,只接受1个参数或2个参数
print(torch.ones(3, 3))
print(torch.zeros(3, 3))
print(torch.eye(3, 4))
data = torch.ones(3, 3)
print(torch.ones_like(data))

randperm:随机打散

# randperm:随机打散
print(torch.randperm(10))

a = torch.rand(2, 3)
b = torch.rand(2, 2)
idx = torch.randperm(2)
print(idx)
print(a)
print(b)
print(a[idx])    # 达到协同shuffle的功能
print(b[idx])

索引与切片

indexing

a = torch.rand(4, 3, 28, 28)
print(a[0])
print(a[0].shape)    # torch.Size([3, 28, 28]) ：索引第一个维度 ：取第0张图片

print(a[0, 0].shape)    # torch.Size([28, 28])：第二个维度：第0张图片的第0个通道

print(a[0, 0, 2])
print(a[0, 0, 2, 4])    # tensor(0.9441) : 第0张图片第0个通道第二行第4列

select first/last N

# select first/last N
a = torch.rand(4, 3, 28, 28)
print(a.shape)    # torch.Size([4, 3, 28, 28])
print(a[:2].shape)    # torch.Size([2, 3, 28, 28])
print(a[:2, :1, :, :].shape)    # torch.Size([2, 1, 28, 28])
print(a[:2, 1:, :, :].shape)    # torch.Size([2, 2, 28, 28])
print(a[:2, -1:, :, :].shape)    # torch.Size([2, 1, 28, 28])

select by steps

# select by steps
a = torch.rand(4, 3, 28, 28)
print(a[:, :, 0:28:2, 0:28:2].shape)    # torch.Size([4, 3, 14, 14])

print(a[:, :, ::2, ::2].shape)    # torch.Size([4, 3, 14, 14])

select by specific index

# select by specific index
a = torch.rand(4, 3, 28, 28)
print(a)
print(a.index_select(0, torch.tensor([0, 2])).shape)    # 第1个参数的第0和第1个
print(a.index_select(2, torch.arange(20)).shape)

... 任意多的维度

# ... 任意多的维度
a = torch.rand(4, 3, 28, 28)
print(a[...].shape)    # torch.Size([4, 3, 28, 28])
print(a[:, 1, ...].shape)    # torch.Size([4, 28, 28])
print(a[..., :2].shape)    # torch.Size([4, 3, 28, 2])

select by mask

# select by mask
x = torch.randn(3, 4)
y = torch.randn(3, 4)
print(x)
mask = x.ge(0.5)    # >=0.5的位置为True
print(mask)
print(torch.masked_select(y, mask))    # 为True的位置选出来

select by flatten index

# select by flatten index
src = torch.IntTensor(3, 4)
print(src)
print(torch.take(src, torch.tensor([0, 2, 5])))

Tensor维度变换

view # 将一个shape转换成例一个shape
squeeze(减少维度)/unsqueeze(增加维度)
transpose(单维变换)/t(转置)/repeat(多维变换)
expand(改变理解方式)/repeat(实实在在增加数据 memory copied)

view: lost dim information

# view: lost dim information
a = torch.rand(4, 1, 28, 28)
print(a)
print(a.shape)
print(a.view(4, 28 * 28).shape)
print(a.view(4 * 28, 28).shape)
print(a.view(4*1, 28, 28).shape)
b = a.view(4, 784)
b.view(4, 28, 28, 1)    # logic bug

# flexible but prone to corrupt, 维度不匹配
print(a.view(4, 783))    # RuntimeError: shape '[4, 783]' is invalid for input of size 3136

squeeze / unsqueeze

unsqueeze

"""
范围:
    [-a.dim()-1, a.dim()+1]
    [-5, 5)
"""
a = torch.rand(4, 1, 28, 28)
print(a.shape)
print(a.unsqueeze(0).shape)
print(a.unsqueeze(-1).shape)
print(a.unsqueeze(4).shape)
print(a.unsqueeze(-5).shape)
print(a.unsqueeze(5).shape)    # IndexError: Dimension out of range (expected to be in range of [-5, 4], but got 5)

a = torch.tensor([1.2, 2.3])
print(a)
print(a.unsqueeze(-1))
print(a.unsqueeze(0))

# 案例:
b = torch.rand(32)
f = torch.rand(4, 32, 14, 14)
b = b.unsqueeze(1).unsqueeze(2).unsqueeze(0)
print(b.shape)

squeeze

# squeeze
b = torch.rand(1, 32, 1, 1)
print(b.squeeze())    # 能压缩的都压缩
print(b.squeeze(0).shape)    # 压缩第0个元素
print(b.squeeze(-1).shape)
print(b.squeeze(1).shape)    # 32不能压缩就不压缩
print(b.squeeze(-4).shape)

expand/repeat

# expand/repeat
# expand: broadcasting  改变理解方式
# repeat: memory copied  实实在在的增加数据
a = torch.rand(4, 32, 14, 14)

b = torch.rand(1, 32, 1, 1)
print(b)
print(b.expand(4, 32, 14, 14))    # torch.Size([4, 32, 14, 14])

print(b.expand(-1, 32, -1, -1).shape)    # -1表示该维度不变
print(b.expand(-1, 32, -1, -4).shape)    # 写-4变-4    RuntimeError: invalid shape dimension -128


# repeat:不建议使用
print(b.repeat(4, 32, 1, 1).shape)    # 第二个拷贝32次
print(b.repeat(4, 1, 1, 1).shape)
print(b.repeat(4, 1, 32, 32).shape)    #

t():转置只适合2D tensor

# t():转置 只适合2D tensor
a = torch.randn(3, 4)
print(a)
print(a.t())

Transpose: 维度变换

# Transpose: 维度变换
a = torch.rand(4, 3, 32, 32)
print(a.shape)
"""
RuntimeError: view size is not compatible with input tensor's size and stride
(at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
"""
a1 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32)    # 要加contigous
a2 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32).transpose(1, 3)
print(a1.shape)
print(a2.shape)

permute:可以直接排位置，可以使用任意多次的transpose来达到他的目的

# permute:可以直接排位置，可以使用任意多次的transpose来达到他的目的
a = torch.rand(4, 3, 28, 28)
print(a.transpose(1, 3).shape)    # torch.Size([4, 28, 28, 3])
b = torch.rand(4, 3, 28, 32)
print(b.transpose(1, 3).shape)    # torch.Size([4, 32, 28, 3])
print(b.transpose(1, 3).transpose(1, 3).shape)    # torch.Size([4, 3, 28, 32])
print(b.permute(0, 2, 3, 1).shape)    # torch.Size([4, 28, 32, 3])

Broadcast自动扩展

"""
expand
without copying data

insert 1 dim ahead
expand dims with size 1 to same size
feature maps:[4, 32, 14, 14]
bias:[32, 1, 1] => [1, 32, 1, 1] => [4, 32, 14, 14]    bias的扩张
"""

broadcast

# situation 1
# [4, 32, 14, 14]
# [1, 32, 1, 1] => [4, 32, 14, 14]

# situation2
# [4, 32, 14, 14]
# [14, 14] => [1, 1, 14, 14] => [4, 32, 14, 14]    # 可以先unsqueeze再expand

# situation3（不符合）
# [4, 32, 14, 14]
# [2, 32, 14, 14]

# a = torch.tensor([2, 32, 14, 14])
# # print(a)
# # print(a[:])

# a = torch.IntTensor(4, 3)
# b = torch.IntTensor(3)
# print(a)
# print(b)
"""
match from last dim
1. no dim
2. dim of size 1
"""

拼接与拆分

"""
Merge or split
合并:
cat
stack
分割:
split
chunk
"""

cat

# cat
a = torch.rand(4, 32, 8)
b = torch.rand(5, 32, 8)
print(torch.cat([a, b], dim=0).shape)    # torch.Size([9, 32, 8])

a1 = torch.rand(4, 3, 32, 32)
a2 = torch.rand(4, 1, 32, 32)
print(torch.cat([a1, a2], dim=0).shape)    # RuntimeError: invalid argument 0 其他维度要一致
print(torch.cat([a1, a2], dim=1).shape)    # torch.Size([4, 4, 32, 32])

stack: create a new dim: 需求维度完全一致

# stack: create a new dim: 需求 维度完全一致
a1 = torch.rand(4, 3, 16, 32)
a2 = torch.rand(4, 3, 16, 32)
print(torch.cat([a1, a2], dim=2).shape)    # torch.Size([4, 3, 32, 32])
print(torch.stack([a1, a2], dim=2).shape)    # torch.Size([4, 3, 2, 16, 32])
a = torch.rand(32, 8)
b = torch.rand(32, 8)
print(torch.stack([a, b], dim=0).shape)    # torch.Size([2, 32, 8])

split: by len: 根据长度来分

# split: by len: 根据长度来分
b = torch.rand(32, 8)
a = torch.rand(32, 8)
# print(a.shape)    # torch.Size([32, 8])
c = torch.stack([a, b], dim=0)
# print(c.shape)    # torch.Size([2, 32, 8])
aa, bb = c.split([4, 4], dim=2)
print(aa.shape, bb.shape)    # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])

# aa, bb = c.split(2, dim=0)    # ValueError: not enough values to unpack (expected 2, got 1)

print(c.shape)    # torch.Size([2, 32, 8])

chunk: by num: 根据数量来分

# chunk: by num: 根据数量来分
aa, bb = c.chunk(2, dim=2)    # torch.Size([2, 32, 8])
print(aa.shape, bb.shape)    # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])

数学运算

"""
Math operation
1. add/minus/multiply/divide
2. matmul
3. pow
4. sqrt/rsqrt
5. round
"""

基础部分

# 基础部分
a = torch.rand(3, 4)
b = torch.rand(4)

print(a)
print(b)
print(a + b)    # b会被广播
# all()函数的功能: 如果张量tensor中所有元素都是True, 才返回True; 否则返回False
b = torch.tensor([1, 1, 1, 1])
print(torch.all(torch.eq(a-b, torch.sub(a, b))))

matmul

# matmul
# torch.mm
#     only for 2d
# torch.matmul
# @
a = torch.tensor([[3., 3.],
                 [3., 3.]])
print(a)
b = torch.ones(2, 2)
print(b)

print(torch.mm(a, b))    # 只针对2d矩阵

print(torch.matmul(a, b))

print(a@b)

# 案例:
# ==2d的tensor运算
a = torch.rand(4, 784)
x = torch.rand(4, 784)
w = torch.rand(512, 784)    # 分别为ch-out ch-in

print((x.@w.t()).shape)    # torch.Size([4, 512]) ×时第一个元素为out，所以需要转置

print(torch.matmul(x, w.t()).shape)    # torch.Size([4, 512])

# >2d的tensor运算
a = torch.rand(4, 3, 28, 64)
b = torch.rand(4, 3, 64, 32)
print(torch.matmul(a, b).shape)    # torch.Size([4, 3, 28, 32])
b = torch.rand(4, 1, 64, 32)
print(torch.matmul(a, b).shape)    # torch.Size([4, 3, 28, 32]), 这种情况会先使用broadcast,再使用矩阵相乘

power

# power
a = torch.full([2, 2], 3)
print(a.pow(2))
print(a**2)
aa = a**2
print(aa.sqrt())
print(aa.rsqrt())
print(aa**(0.5))

exp/log

# exp/log
a = torch.exp(torch.ones(2, 2))
print(a)
print(torch.log(a))

approximation

# approximation
a = torch.tensor(3.14)
print(a.floor(), a.ceil(), a.trunc(), a.frac())    # tensor(3.) tensor(4.) tensor(3.) tensor(0.1400)
#      往下取整    往上取整   截取,保留整数  截取,保留小数

a = torch.tensor(3.499)
print(a.round())    # tensor(3.)  四舍五入
a = torch.tensor(3.5)
print(a.round())    # tensor(4.)

clamp：裁剪

# clamp：裁剪
"""
gradient clipping
(min)
(min, max)
"""
grad = torch.rand(2, 3)*15
print(grad)
print(grad.max())
print(grad.median())
print(grad.clamp(10))    # 里面的元素小于10的全部变成10
print(grad.clamp(2, 10))    # 小于2的裁剪成2， 大于10的裁剪成10

Tensor统计

"""
statistics
norm：范数
mean sum
prod
max, min, argmin(最小值的位置), argmax
kthvalue(第几个值 默认是小的: 比如第8个小的), topk(top多少)
"""

norm:

# norm:
a = torch.full([8], 1)
b = a.view(2, 4)
c = a.view(2, 2, 2)
print(a)
print(b)
print(c)
print(a.norm(1), b.norm(1), c.norm(1))    # nsor(8.) tensor(8.) tensor(8.)
print(a.norm(2), b.norm(2), c.norm(2))    # tensor(2.8284) tensor(2.8284) tensor(2.8284)

print(b.norm(1, dim=1))    # dim=1：将dim=1的部分取范数，同时二维向量变成一维向量  tensor([4., 4.])
print(b.norm(2, dim=1))    # tensor([2., 2.])

print(c.norm(1, dim=0))
print(c.norm(2, dim=0))

mean sum min max prod(阶乘)

# mean sum min max prod(阶乘)
a = torch.arange(8).view(2, 4).float()
print(a)
"""
tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]])
"""
print(a.min(), a.max(), a.mean(), a.prod())    # tensor(0.) tensor(7.) tensor(3.5000) tensor(0.)
print(a.sum())    # tensor(28.)
print(a.argmin(), a.argmax())    # tensor(0) tensor(7)

argmin/argmax在指定维度的表示

# argmin/argmax在指定维度的表示
a = torch.rand(4, 5)
print(a)
print(a.argmax())
print(a.argmax(dim=1))    # 在dim=1即取每个维度中最大值的位置

keepdim

# keepdim
a = torch.rand(4, 10)
print(a)
# print(a.max(dim=1))
print(a.argmax(dim=1))
print(a.max(dim=1, keepdim=True))    # 这个会返回他在dim=1的最大值和最大值的位置

top-k or k-th

# top-k or k-th
a = torch.rand(4, 10)
print(a.topk(3, dim=1))
print(a.topk(3, dim=1, largest=False))

print(a.kthvalue(8, dim=1))    # 返回dim=1的第8大的值
"""
torch.return_types.kthvalue(
values=tensor([0.7363, 0.8011, 0.6856, 0.6297]),
indices=tensor([4, 0, 7, 8]))
"""

compare

# compare
"""
>  >= <  <=  !=  ==

torch.eq(a, b)
"""
a = torch.rand(4, 10)
print(a > 5)    # 里面的每个元素都要比较
print(torch.gt(a, 0))
print(a != 0)

a = torch.ones(2, 3)
b = torch.randn(2, 3)

"""
疑问: torch.rand()和torch.randn()的区分?
答：rand()是均匀分布，randn()是标准正太分布
"""
print(a)
print(b)
print(torch.eq(a,b))

print(torch.eq(a, a))    # 返回每个元素
"""
tensor([[True, True, True],
        [True, True, True]])
"""
print(torch.equal(a, a))     # True    所有都为True才为True

Tensor高阶

"""
tensor 高级操作
where
gather: 收集，gather语句类似于查表的过程.   设计目的:使用GPU实现CPU的功能
"""

where

# where
# torch.where(condition,x,y) --> Tensor
# 案例:
cond = torch.tensor([[0.6769, 0.7271],
                    [0.8884, 0.4163]])
print(cond)
a = torch.zeros(2, 2)
print(a)
b = torch.ones(2, 2)
print(b)

print(torch.where(cond > 0.5, a, b))    # 如果cond成立，选取a中的元素，否则选择b中的元素

gather

# 案例:检索  retrieve label
prob = torch.randn(4, 10)
# print(prob)

idx = prob.topk(dim=1, k=3)
# print(idx)
idx = idx[1]
# print(idx)
label = torch.arange(10) + 100
# print(label)
label_expand = label.expand(4, 10)
print(label_expand)
print(idx)    # 这是索引
print('------------------')
# print(idx.long())    # 转换成Longtensor数据格式
print(torch.gather(label_expand, dim=1, index=idx.long()))    # 按照index索引进行取数据

梯度

"""
1. len： 可以表示变化的层度
2. dir：表示变化的方向
"""

激活函数

Sigmoid / Logistic梯度推导

# 激活函数
z = torch.linspace(-100, 100, 10)
# sigmoid激活函数
print(z)
print(torch.sigmoid(z))    # 范围在0-1

Tanh

# tanh激活函数: 在rnn中用的比较多 取值范围为-1-1
a = torch.linspace(-1, 1, 10)
print(torch.tanh(a))

Relu

# Relu激活函数
# 在pytorch中的两种实现：1.从torch.nn中  2. 从torch.relu中
from torch.nn import functional as F
a = torch.linspace(-1, 1, 10)
print(torch.relu(a))
print(F.relu(a))

LOSS及其梯度

"""
1. Mean Squared Error
2. Cross Entropy Loss
    1. binary
    2. multi-class
"""

MSE

一：autograd.grad

# 一：autograd.grad

# Mean Squared Error
# 这里注意MSE于2范数相比，2范数有开根号但是这里没有开根号

# 使用pytorch进行简单的求导
# 这里pred = w * x + b
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w)    # 第一个参数pred 第二个参数label
print(torch.autograd.grad(mse, [w]))    # 第一个参数loss  第二个参数w1, w2, w3
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn?
w函数在初始化的时候没有设置他需要导数信息，pytorch在建图的时候标注torch不需要求导信息
"""
# 改变如下：告诉pytorch w需要梯度信息
w.requires_grad_()
print(torch.autograd.grad(mse, [w]))
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
更新之后还是会报错，因为pytorch是一个动态图
这里更新了w但是图还没有更新
因为pytorch是做一步计算一次图
"""
# 必须经过计算图的过程重新更新一遍
mse = F.mse_loss(torch.ones(1), x*w)    # 动态图的建图
print(torch.autograd.grad(mse, [w]))    # (tensor([2.]),) 图重新更新后可以计算出结果
print(mse)

二：loss.backward

# 二：loss.backward
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w)

# torch.autograd.grad(mse, [w])

w.requires_grad_()    # 使w获取梯度

mse = F.mse_loss(torch.ones(1), x*w)    # 再次计算获取动态图
# torch.autograd.grad(mse, [w])    # 1. 自动计算  再次计算梯度

mse.backward()    # 2. 手动计算tensor([2.])
print(w.grad)

总结

"""
Gradient API
    1. 手动求导torch.autograd.grad(loss, [w1, w2, ...])    
        [w1 grad, w2 grad...]
    
    2. 自动求导loss.backward()    # 他返回的梯度信息不会返回而是附在每个梯度信息上面
        w1.grad
        w2.grad
"""

Softmax

"""
softmax求导:
    pi(1-pj)    if i=j
    -pj*pi      if i!=j

    1  if i=j
    0  if i!=j
"""
import torch
from torch.nn import functional as F
a = torch.rand(3)    # tensor([0.4207, 0.2955, 0.8440])
print(a.requires_grad_())    # 这样之后就可以求梯度 tensor([0.5424, 0.1913, 0.9416], requires_grad=True)

p = F.softmax(a, dim=0)    # 自动完成建图操作 tensor([0.2489, 0.3556, 0.3954], grad_fn=<SoftmaxBackward>)

# 当你调用一次backward之后除了完成一次反向传播以外，还会把这个图的梯度信息清除掉

print(torch.autograd.grad(p[1], [a],retain_graph=True))    # (tensor([-0.0755,  0.1879, -0.1125]),)    i=1为正的其他的为负的
# #
# #
print(torch.autograd.grad(p[2], [a]))    # (tensor([-0.1349, -0.1125,  0.2473]),)    # i=2为正的其他的为负的

感知机

单一输出感知机

import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(1, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t())    # 这里没有写bias
print(o)
print(torch.ones(1, 1))
loss = F.mse_loss(torch.ones(1, 1), o)
print(loss)
loss.backward()
print(w.grad)

多输出感知机

import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(2, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t())
loss = F.mse_loss(torch.ones(1, 2), o)
loss.backward()
print(w.grad)

链式法则

import torch

x = torch.tensor(1.)
w1 = torch.tensor(2., requires_grad=True)
b1 = torch.tensor(1.)
w2 = torch.tensor(2., requires_grad=True)
b2 = torch.tensor(1.)

y1 = x*w1 + b1
y2 = y1*w2 + b2

dy2_dy1 = torch.autograd.grad(y2, [y1], retain_graph=True)[0]
dy1_dw1 = torch.autograd.grad(y1, [w1], retain_graph=True)[0]

dy2_dw1 = torch.autograd.grad(y2, w1, retain_graph=True)[0]    # 这里的w1加不加[]都行？？

print(dy2_dy1*dy1_dw1)

print(dy2_dw1)

优化实例

import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import torch
def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2

# 画图
x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)    # 将x这个图片和y这个图片拼接到一起
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)    # 把x, y的坐标送入Z函数里面得到z的坐标
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()


# 找最小值--初始点不同找的也不同
# [1., 0.], [-4, 0.], [4, 0.]
x = torch.tensor([4., 0.], requires_grad=True)    # 在这里不同的初始化权重更新的速率和最后得到的结果都不太同。所以说梯度下降法的初始化很关键
optimizer = torch.optim.Adam([x], lr=1e-3)
for step in range(20000):

    pred = himmelblau(x)    # x送进来得到预测值，目的是min这个预测值

    optimizer.zero_grad()    # 将梯度信息进行清零
    pred.backward()    # 生成x.grad和y.grad即：x和y的梯度信息
    optimizer.step()    # 将x,y的梯度进行更新

    if step % 2000 == 0:
        print('step {}: x = {}, f(x) = {}'
               .format(step, x.tolist(), pred.item()))

Logistic Regression

Cross Entropy

熵

import torch
a = torch.full([4], 1/4)
print(a)
print(a*torch.log2(a))
print(-(a*torch.log2(a)).sum())    # tensor(2.) 熵越高代表越稳定，没有惊喜度
b = torch.tensor([0.1, 0.1, 0.1, 0.7])
print(-(b*torch.log2(b)).sum())    # tensor(1.3568) higher uncertainty  惊喜度较高
c = torch.tensor([0.001, 0.001, 0.001, 0.999])
print(-(c*torch.log2(c)).sum())    # tensor(0.0313)  极度不稳定

numerical stability

import torch
from torch.nn import functional as F
x = torch.randn(1, 784)
w = torch.randn(10, 784)
logits = x@w.t()
print(logits.shape)
pred = F.softmax(logits, dim=1)
print(pred)
pred_log = torch.log(pred)
loss1 = F.nll_loss(pred_log, torch.tensor([3]))
print(loss1)
loss2 = F.cross_entropy(logits, torch.tensor([3]))    # 这里使用logits, 因为cross_entropy = softmax + log + nll_loss   (这三个操作一起)
print(loss2)

全连接层

import torch
import  torch.nn as nn

x = torch.randn(1, 784)    # torch.Size([1, 784])
print(x.shape)

layer1 = nn.Linear(784, 200)    # 在这里第一个参数使ch-in 第二个参数是ch-out
layer2 = nn.Linear(200, 200)
layer3 = nn.Linear(200, 10)

x = layer1(x)
print(x.shape)    # torch.Size([1, 200])

x = layer2(x)
print(x.shape)    # torch.Size([1, 200])

x = layer3(x)
print(x.shape)    # torch.Size([1, 10])
print(x)

nn.Relu vs F.relu

import torch
import torch.nn as nn
from torch.nn import functional as F

x = torch.randn(1, 10)
print(x.shape)

# 调用方式:类方法
class ML(nn.module):
    def __init__(self):
        super(ML, self).__init__()

        self.model = nn.Sequential(  # 构建模型
            nn.Linear(784, 200),
            nn.ReLU(inplace=True),
            nn.Linear(200, 200),
            nn.ReLU(inplace=True),
            nn.Linear(200, 10),
            nn.ReLU(inplace=True),
        )
# 函数方法
x = F.relu(x, inplace=True)

GPU加速

device = torch.device('cuda:0')    # 使用设备， 可以选择将需要运算的搬到你需要的设备。

# 将需要加速的运算送进GPU
criteon = nn.CrossEntropyLoss().to(device)    # 使用.to()方法会返回个inference,他的类型取决于原来的类型

计算准确的代码

"""
计算准确度的代码
"""

import torch
from torch.nn import functional as F
logits = torch.rand(4, 10)

pred = F.softmax(logits, dim=1)
print(pred)

pred_label = pred.argmax(dim=1)    # 取最大值的下标

print(pred_label)

label = torch.tensor([9, 3, 2, 9])
correct = torch.eq(pred_label, label)
print(correct)
print(correct.sum().float().item()/4)    # item()作用是得到里面的元素

Visdom可视化

"""
pytorch可视化需要:
方法一：
pip install tensorboardX
1. 需要开启一个监听的进程

方法二：Visdom
1. pip install visdom
2. python -m visdom.server  (相当于开启了一个web服务器,web服务器会把数据渲染到网页上去)
    可能会遇到的问题: ERROR:root:Error 404 while downloading https://unpkg.com/layout-bin-packer@1.4.0

解决方法: install form source（从github的facebookresearch/visdom下载）
    步骤1: pip uninstall visdom
    步骤2: 官网下载源代码，之后cd进去目录(进去visdom-master)，之后运行pip install -e .
    步骤3： 退回用户目录后再python -m visdom.server
    步骤4：打开浏览器，输入他给的地址
"""


# 测试:
from visdom import Visdom
viz = Visdom()

"""
{Y的值，X的值} win可以理解为ID(还有一个id叫做env(默认使用main))   opts是额外的配置信息

对于非image还是numpy数据，image数据是tensor
"""
# viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))
# viz.line([loss.item()], [global_step], win='train_loss', update='append')

在训练中

global_step += 1
viz.line([loss.item()], [global_step], win='train_loss', update='append')

在test中

# viz进行可视化
viz.line([[test_loss, correct / len(test_loader.dataset)]],
            [global_step], win='test', update='append')
viz.images(data.view(-1, 1, 28, 28), win='x')
viz.text(str(pred.detach().cpu().numpy()), win='pred',
            opts=dict(title='pred'))

正则化

optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.01)    # 这里weight_decay=0.01是指进行正则化，这里是2范数

Dropout

import torch


net_droped = torch.nn.Sequential(
    torch.nn.Linear(784, 200),
    torch.nn.Dropout(0.5),    # drop 50% of the neuron    (在两层之间断掉一些层)
    torch.nn.ReLU(),
    torch.nn.Linear(200, 200),
    torch.nn.Dropout(0.5),    # drop 50% of the neuron
    torch.nn.ReLU(),
    torch.nn.Linear(200, 10),
)

"""
在训练是需要加上Dropout()

但是在test/val是不需要Dropout()
例如:
for epoch in range(epochs):
    # train
    net_dropped.train()
    for batch_idx, (data, targt) in enumerate(train_loader):
    ...
    
    net_dropped.eval()    # 在测试是需要加上这句话去掉dropout
    test_loss = 0
    correct = 0
    for data, target in test_loader:
    
"""

卷积神经网络

import torch.nn as nn
import torch
from torch.nn import functional as F

# 第一个参数为input的chanel,第二个参数为kernel的数量，kernel_size=3*3    [1, 3, 26, 26]
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=0)
x = torch.rand(1, 1, 28, 28)

out = layer.forward(x)
print(out.shape)    # torch.Size([1, 3, 26, 26])    # 26 = (28-3)/1 + 1


layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1)
out = layer.forward(x)
print(out.shape)    # torch.Size([1, 3, 28, 28])

layer = nn.Conv2d(1, 3, kernel_size=3, stride=2, padding=1)
out = layer.forward(x)
print(out.shape)    # torch.Size([1, 3, 14, 14])

out = layer(x)    # 会自动进行,运用了python的魔术方法 __call__
print(out.shape)    # torch.Size([1, 3, 14, 14])

print(layer.weight)    # 查看layer的权重
print(layer.weight.shape)    # torch.Size([3, 1, 3, 3])

print(layer.bias.shape)    # torch.Size([3])


# F.conv2D

# 上面x = torch.rand(1, 1, 28, 28)
w = torch.rand(16, 3, 5, 5)
b = torch.rand(16)

# out = F.conv2d(x, w, b, stride=1, padding=1)
# print(out)    # 报错，一位x和w的chanels数对应不上
"""
RuntimeError: Given groups=1, weight of size 16 3 5 5, expected input[1, 1, 28, 28] to have 3 channels,
 but got 1 channels instead
"""
x = torch.randn(1, 3, 28, 28)
out = F.conv2d(x, w, b, stride=1, padding=1)
print(out.shape)    # torch.Size([1, 16, 26, 26])

out = F.conv2d(x, w, b, stride=2, padding=2)
print(out.shape)    # torch.Size([1, 16, 14, 14])

池化层

"""
outline:
    Pooling
    upsample
    Relu
"""
import torch
import torch.nn as nn
from torch.nn import functional as F
x = torch.randn(1, 16, 14, 14)
print(x.shape)    # torch.Size([1, 16, 14, 14])

# 从nn中导入最大池化
layer = nn.MaxPool2d(2, stride=2)
out = layer(x)
print(out.shape)    # torch.Size([1, 16, 7, 7])    (14-2)/2 + 1 = 7


# 使用F.的方式平均池化
out = F.avg_pool2d(x, 2, stride=2)    # torch.Size([1, 16, 7, 7])
print(out.shape)

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++#
# upsample
# 采用F.interpolate
# interpolate: 是插值的意思
# +++++++++++++++++++++++++++++++++++++++++++++++++++++#
x = out
out = F.interpolate(x, scale_factor=2, mode='nearest')    # 采用最近邻采样
print(out.shape)    # torch.Size([1, 16, 14, 14])

out = F.interpolate(x, scale_factor=3, mode='nearest')
print(out.shape)    # torch.Size([1, 16, 21, 21])

#------------------------------------------------#
#  Relu激活函数
#
# ------------------------------------------------#

x = torch.randn(1, 16, 7, 7)
print(x.shape)    # torch.Size([1, 16, 7, 7])

# 方法1:采用nn.的方式
layer = nn.ReLU(inplace=True)    # inplace=True x--->x'(x'使用x内存空间)
out = layer(x)
print(out.shape)    # torch.Size([1, 16, 7, 7])

# 方法2：采用F.的方式
out = F.relu(x)
print(out.shape)    # torch.Size([1, 16, 7, 7])

BatchNorm

import torch
import torch.nn as nn

# ----------------------------#
# BatchNorm1d
# ----------------------------#
x = torch.randn(100, 16) + 0.5
print(x.shape)

layer = torch.nn.BatchNorm1d(16)    # 这个必须与前面的匹配起来否则会报错

print(layer.running_mean, layer.running_var)
"""
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) 
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
"""
out = layer(x)
print(layer.running_mean, layer.running_var)
"""
tensor([0.0452, 0.0446, 0.0516, 0.0671, 0.0644, 0.0622, 0.0514, 0.0449, 0.0520,
        0.0546, 0.0461, 0.0620, 0.0332, 0.0450, 0.0384, 0.0580]) 
tensor([0.9868, 0.9935, 1.0214, 1.0137, 1.0009, 0.9895, 1.0065, 1.0319, 0.9841,
        1.0051, 0.9967, 0.9968, 1.0045, 0.9877, 1.0011, 1.0031])
"""
#----------------------------------------#
# 这里的分布服从于 U(0.5, 1)
#
# ---------------------------------------#

x = torch.randn(100, 16) + 0.5
layer = torch.nn.BatchNorm1d(16)

for i in range(5):    # 疑问？？？？？？？？，每循环一次经过一次batchnorm
    out = layer(x)

print(layer.running_mean, layer.running_var)


# ---------------------------#
# nn.BatchNorm2d
# ---------------------------#
x = torch.rand(1, 16, 7, 7)
print(x.shape)

layer = nn.BatchNorm2d(16)
out = layer(x)
print(out.shape)    # torch.Size([1, 16, 7, 7])

print(layer.weight)
"""
这里的weight,bias更权重的那个不太一样
"""
print(layer.weight.shape)    # torch.Size([16])

print(layer.bias.shape)    # torch.Size([16])

# -----------------------------------#
#  class variables
# -----------------------------------#
print(vars(layer))


# ------------------------------------#
# Test
# ------------------------------------#
layer.eval()    # 加这行表示现在是在test阶段
out = layer(x)
print(vars(layer))

nn.Module

import torch
from torch import nn
from torch import optim

# -----------------------------------#
# 使用nn.Module的好处
# 1. 所有的常用的方法都在里面,比如: Linear/Relu/Sigmoid等
# 2. 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
# 3. nn.Module可以自动管理parameters
# 4. modules: all nodes / children: direct children
# 5. to(device) （第84行）
# 6. save and load(第90行)
# 7. train/test的方便的切换(第87行)
# 8. implement own layer 实现自己的类(第31 / 第41 行)    只有class才能写到nn.Sequential里面去[第48行]
# -----------------------------------#

class MyLinear(nn.Module):

    def __init__(self, inp, outp):
        super(MyLinear, self).__init__()

        # requires_grad = True
        self.w = nn.Parameter(torch.randn(outp, inp))    # nn.Parameter会自动地将torch.tensor通过nn.Parameter加到nn.parameter()里面去
        self.b = nn.Parameter(torch.randn(outp))

    def forward(self, x):
        x = x @ self.w.t() + self.b
        return x


class Flatten(nn.Module):    # 将所有的打平

    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, input):
        return input.view(input.size(0), -1)    # -1表示将其他所有的打平



class TestNet(nn.Module):

    def __init__(self):
        super(TestNet, self).__init__()

        self.net = nn.Sequential(nn.Conv2d(1, 16, stride=1, padding=1),
                                 nn.MaxPool2d(2, 2),
                                 Flatten(),    # 实现自己的类，里面只能写类
                                 nn.Linear(1*14*14, 10))

    def forward(self, x):
        return self.net(x)


class BasicNet(nn.Module):

    def __init__(self):
        super(BasicNet, self).__init__()

        self.net = nn.Linear(4, 3)

    def forward(self, x):
        return self.net(x)



class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        #  使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
        self.net = nn.Sequential(BasicNet(),
                                 nn.ReLU(),
                                 nn.Linear(3, 2))

    def forward(self, x):
        return self.net(x)





def main():
    device = torch.device('cuda')
    net = Net()
    net.to(device)    # .to()会返回net引用(和原来的net引用一样)    --->但是对于tensor操作来说不是这样的

    # train
    net.train()
    # test
    net.eval()

    # net.load_state_dict(torch.load('ckpt.mdl'))    # 在开始的时候要加载模型
    #
    #
    # torch.save(net.state_dict(), 'ckpt.mdl')    # 在模型断电或者中断保存模型的当前状态

    for name, t in net.named_parameters():
        print('parameters:', name, t.shape)    # 打印里面地parameters:权重和bias

    for name, m in net.named_children():    # 打印net Sequential的类
        print('children:', name, m)


    for name, m in net.named_modules():
        print('modules:', name, m)



if __name__ == '__main__':
    main()

数据增强

# Data argumentation
# ---------------------------------------#
# 这些操作在torchvision包里面
# 1. Flip：翻转
# 2. Rotate
# 3. Random Move & Crop
# 4. GAN : 生成更多的样本
# 5. Noise: N(0, 0.001)加高斯白噪声
# ---------------------------------------#

batch_size=200
learning_rate=0.01
epochs=10

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([    # Compose的操作类似于nn.Sequential里面
                       transforms.RandomHorizontalFlip(),    # 水平角度的翻转    （随机翻转-可能翻转也有可能不翻转）
                       transforms.RandomVerticalFlip(),    # 垂直方向
                       transforms.RandomRotation(15),    # 旋转方向，参数为旋转的度数
                       transforms.RandomRotation([90, 180, 270]),    # 随机的从90度180度270度中挑一个角度旋转
                       transforms.Resize([32, 32]),    # 传入的参数为list
                       transforms.RandomCrop([28, 28]),    # 裁剪
                       transforms.ToTensor(),
                       # transforms.Normalize((0.1307,), (0.3081,))
                   ])),    # x 转换成x'
    batch_size=batch_size, shuffle=True)

Cifar-10与ResNet18实战

resnet.py

import torch
from torch import nn
from torch.nn import functional as F    # 这里F和nn经常是交叉使用的


class ResBlk(nn.Module):
    """
    resnet block：这里是resnet的一个基本模块
    """

    def __init__(self, ch_in, ch_out, stride=1):
        """

        :param ch_in:
        :param ch_out:
        """
        super(ResBlk, self).__init__()

        # we add stride support for resbok, which is distinct from tutorials.
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)

        self.extra = nn.Sequential()    # nn.Sequential()本来是空的
        if ch_out != ch_in:    # 如果不相等就把他的ch_in变成ch_out, 也就是说:他这个是resnet的旁边短接线
            # [b, ch_in, h, w] => [b, ch_out, h, w]
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
                nn.BatchNorm2d(ch_out)
            )
    # -------------------------------#
    # 疑问: python在实例化的时候为啥不用调用forward函数？
    # 因为pytorch在nn.modules中使用了__call__,里面实现了forward方法
    # 只要实例化对象就会自动调用__call__,当自己又没有__call__方法，所以调用父类方法，由于子类重写了forward方法
    # 所以优先调用子类的forward方法
    # -------------------------------#
    def forward(self, x):
        """

        :param x: [b, ch, h, w]
        :return:
        """
        out = F.relu(self.bn1(self.conv1(x)))    # 这里经过卷积层，BN层， 然后经过relu层
        out = self.bn2(self.conv2(out))    # 这里经过卷积层，BN层

        # short cut.    # 这里是短接
        # extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
        out = self.extra(x) + out    # element-wise add:
        out = F.relu(out)    # 最后再经过relu层输出
        print('这里打印下out看看', out.shape)
        return out




class ResNet18(nn.Module):

    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
            nn.BatchNorm2d(64)
        )
        # followed 4 blocks
        # [b, 64, h, w] => [b, 128, h ,w]    # 注意这里h,w是变化的
        self.blk1 = ResBlk(64, 128, stride=2)
        # [b, 128, h, w] => [b, 256, h, w]
        self.blk2 = ResBlk(128, 256, stride=2)
        # # [b, 256, h, w] => [b, 512, h, w]
        self.blk3 = ResBlk(256, 512, stride=2)
        # # [b, 512, h, w] => [b, 1024, h, w]
        self.blk4 = ResBlk(512, 512, stride=2)    # 这里视频是self.blk4 = ResBlk(512, 1024)

        self.outlayer = nn.Linear(512*1*1, 10)    # 最后再跟一个全连接层

    def forward(self, x):
        """

        :param x:
        :return:
        """
        x = F.relu(self.conv1(x))    # 先经过一个卷积层，后面再跟一个relu函数, 经过后x.shape = [128, 64, 10, 10]
        # [b, 64, h, w] => [b, 1024, h, w]
        x = self.blk1(x)    # 经过这层后x.shape = torch.Size([128, 128, 5, 5])
        x = self.blk2(x)    # 经过这层后x.shape = torch.Size([128, 256, 3, 3])
        x = self.blk3(x)    # 经过这层后x.shape = torch.Size([128, 512, 2, 2])
        x = self.blk4(x)    # 经过这层后x.shape = torch.Size([128, 512, 2, 2])


        # print('after conv:', x.shape) #[b, 512, 2, 2]
        # [b, 512, h, w] => [b, 512, 1, 1]
        x = F.adaptive_avg_pool2d(x, [1, 1])
        # print('after pool:', x.shape)
        x = x.view(x.size(0), -1)    # 经过这层后x.shape = torch.Size([128, 512])    x.size(0) = 128
        x = self.outlayer(x)    # 经过一个全连接层  经过这层后x.shape = torch.Size([128, 10])


        return x



def main():
    # ResBlk
    blk = ResBlk(64, 128, stride=4)
    tmp = torch.randn(2, 64, 32, 32)
    out = blk(tmp)
    print('block:', out.shape)    # block: torch.Size([2, 128, 8, 8])

    # ResNet18
    x = torch.randn(2, 3, 32, 32)
    model = ResNet18()
    out = model(x)
    print('resnet:', out.shape)    # resnet: torch.Size([2, 10])




if __name__ == '__main__':
    main()



# ---------------ResNet18模型----------------------------#
"""
ResNet18(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlk(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk2): ResBlk(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk3): ResBlk(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk4): ResBlk(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential()
  )
  (outlayer): Linear(in_features=512, out_features=10, bias=True)
)
"""
# ----------------------------------------------------------------------#

main.py

import torch
from torch.utils.data import DataLoader    # DataLoader是为了能够批量加载数据
from torchvision import datasets    # 从torchvision中导入数据集
from torchvision import transforms
from torch import nn, optim

from lenet5 import Lenet5
from resnet import ResNet18


def main():
    batchsz = 128    # 这里是batch-size

    # torchvision中提供一些已有的数据集 #  第一个参数:自定目录，第二个参数:Train=True, transform：对数据做些变换
    cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]), download=False)    # download=True:可以自动的download
    cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)    # Dataloader：方便一次加载多个. shuffle:加载的时候随机换一下

    cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]), download=False)
    cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)


    x, label = iter(cifar_train).next()
    print('x:', x.shape, 'label:', label.shape)    # x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])

    device = torch.device('cuda')    # 后面可以使用GPU计算
    # model = Lenet5().to(device)
    model = ResNet18().to(device)

    criteon = nn.CrossEntropyLoss().to(device)    # loss函数他包含softmax, 因为是分类任务所以采用crossentropy
    optimizer = optim.Adam(model.parameters(), lr=1e-3)    # 优化器把网络里的参数传给他
    print(model)

    for epoch in range(1000):

        model.train()    # 模型为train模式
        for batchidx, (x, label) in enumerate(cifar_train):    # 从每个epoch里的batch_size
            # [b, 3, 32, 32]
            # [b]
            x, label = x.to(device), label.to(device)    # 转换到cuda上面来


            logits = model(x)    # 他与predict的区别是是否经过softmax操作
            # logits: [b, 10]
            # label:  [b]    # label不需要probality
            # loss: tensor scalar    # 长度为0的标量
            loss = criteon(logits, label)    # 这个label是y

            # backprop
            optimizer.zero_grad()    # 如果不清0就是累加的效果
            loss.backward()
            optimizer.step()    # 更新weight,更新的weight写进optimizer里面


        print(epoch, 'loss:', loss.item())    # 对于标量,使用item()把他转换成Numpy

        # test
        model.eval()    # 模型为test模式
        with torch.no_grad():    # 这一步是告诉不需要构建梯度(不需要构建图)
            # test
            total_correct = 0    # 正确的数量
            total_num = 0    # 总的数量
            for x, label in cifar_test:
                # [b, 3, 32, 32]
                # [b]
                x, label = x.to(device), label.to(device)

                # [b, 10]
                logits = model(x)
                # [b]
                pred = logits.argmax(dim=1)
                # [b] vs [b] => scalar tensor
                correct = torch.eq(pred, label).float().sum().item()
                total_correct += correct
                total_num += x.size(0)
                # print(correct)

            acc = total_correct / total_num
            print(epoch, 'test acc:', acc)



if __name__ == '__main__':
    main()

数据集格式

-- cifar
    --cifar-10-batches-py
        --batches.meta
        --data_batch_1
        --data_batch_2
        --data_batch_3
        --data_batch_4
        --data_batch_5
        --readme.html
        --test_batch

pytorch学习笔记

基本数据类型

5月16号更新

---------------------------------------------

---------------------------------------------

Data type

type check

Dimension 0/1/2

创建Tensor

import from numpy

import from list

uninitialized

set default type

rand/rand_like, randint

randn

full

arange/range

linspace/logspace

Ones/zeros/eye

randperm:随机打散

索引与切片

indexing

select first/last N

select by steps

select by specific index

... 任意多的维度

select by mask

select by flatten index

Tensor维度变换

view: lost dim information

squeeze / unsqueeze

unsqueeze

squeeze

expand/repeat

t():转置 只适合2D tensor

Transpose: 维度变换

permute:可以直接排位置，可以使用任意多次的transpose来达到他的目的

Broadcast自动扩展

broadcast

拼接与拆分

cat

stack: create a new dim: 需求 维度完全一致

split: by len: 根据长度来分

chunk: by num: 根据数量来分

数学运算

基础部分

matmul

power

exp/log

approximation

clamp：裁剪

Tensor统计

norm:

mean sum min max prod(阶乘)

argmin/argmax在指定维度的表示

keepdim

top-k or k-th

compare

Tensor高阶

where

gather

梯度

激活函数

Sigmoid / Logistic梯度推导

Tanh

Relu

LOSS及其梯度

MSE

一：autograd.grad

二：loss.backward

总结

Softmax

感知机

单一输出感知机

多输出感知机

链式法则

优化实例

Logistic Regression

Cross Entropy

熵

t():转置只适合2D tensor

stack: create a new dim: 需求维度完全一致