链接

项目仓库:pytorch | github

pytorch 源码:pytorch/pytorch | source code | github

torch 源码:pytorch/pytorch/torch source code | github

torchvision源码:pytorch/vision | source code | github

torch.Tensor

torch.Tensor

ways to make tensor

  • Uniform distribution on the interval [0,1)
    torch.rand

  • Normal distribution with mean 0 and variance 1 (standard normal distribution)
    torch.randn

  • Random integers generated uniformly between low (inclusive) and high (exclusive)
    torch.randint

  • Returns a tensor filled with the scalar value 1
    torch.ones

  • Demo

    1
    2
    3
    4
    5
    6
    import torch
    tensor = torch.randint(low=0, high=3, size=(3, 4))
    print(tensor)
    # tensor([[1, 2, 1, 1],
    # [0, 2, 1, 2],
    # [1, 2, 0, 0]])

torch.set_printoptions

  • Docs
    torch.set_printoptions

  • Deom

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import torch

    tensor = torch.tensor([torch.pi, 1/3])
    print(tensor)
    # tensor([3.1416, 0.3333])

    # Limit the number of elements shown: reserve 2 decimal places
    torch.set_printoptions(precision=2)
    print(tensor)
    # tensor([3.14, 0.33])

torch.Tensor.unfold

torch.Tensor.unfold

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import torch

x = torch.tensor([[1., 2., 3., 4.],
[5., 6., 7., 8.],
[9., 10., 11., 12.]])

x_unfold = x.unfold(0, 1, 1)
print(x_unfold)
# tensor([[[ 1.],
# [ 2.],
# [ 3.],
# [ 4.]],
# [[ 5.],
# [ 6.],
# [ 7.],
# [ 8.]],
# [[ 9.],
# [10.],
# [11.],
# [12.]]])
x_unfold = x_unfold.unfold(1, 2, 2)
print(x_unfold)
# tensor([[[[ 1., 2.]],
# [[ 3., 4.]]],
# [[[ 5., 6.]],
# [[ 7., 8.]]],
# [[[ 9., 10.]],
# [[11., 12.]]]])
x_unfold = x_unfold.contiguous().view(-1, 1, 2)
print(x_unfold)
# tensor([[[ 1., 2.]],
# [[ 3., 4.]],
# [[ 5., 6.]],
# [[ 7., 8.]],
# [[ 9., 10.]],
# [[11., 12.]]])

Tensor.views

Tensor Views

Tensor.permute

torch.Tensor.permute

torch.unique

torch.unique

1
2
3
4
5
6
7
8
9
import torch

x = torch.randint(0, 5, (3, 4))
print(x)
# tensor([[2, 1, 2, 4],
# [4, 3, 4, 3],
# [3, 2, 4, 3]])
print(x.unique(return_counts=True))
# (tensor([1, 2, 3, 4]), tensor([1, 3, 4, 4]))

torch.max

torch.max

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import torch

input = torch.randn(2, 3, 4)
print(input) # shape: torch.Size([2, 3, 4])
# tensor([[[ 0.2506, 0.2790, -0.0562, -0.1434],
# [ 0.8521, -0.8722, 0.4884, 1.0183],
# [-0.7518, 0.0761, 0.0518, -0.8154]],

# [[-3.4666, -0.4228, -0.7586, -1.5254],
# [ 1.5851, 1.6208, 0.3673, -0.8441],
# [ 0.7171, -1.0838, 0.9449, 0.3339]]])

output = torch.max(input, dim=0)
print(output) # shape: torch.Size([3, 4])
# torch.return_types.max(
# values=tensor([[ 0.2506, 0.2790, -0.0562, -0.1434],
# [ 1.5851, 1.6208, 0.4884, 1.0183],
# [ 0.7171, 0.0761, 0.9449, 0.3339]]),
# indices=tensor([[0, 0, 0, 0],
# [1, 1, 0, 0],
# [1, 0, 1, 1]])
# )

output = torch.max(input, dim=1)
print(output) # shape: torch.Size([2, 4])
# torch.return_types.max(
# values=tensor([[0.8521, 0.2790, 0.4884, 1.0183],
# [1.5851, 1.6208, 0.9449, 0.3339]]),
# indices=tensor([[1, 0, 1, 1],
# [1, 1, 2, 2]])
# )

output = torch.max(input, dim=2)
print(output) # shape: torch.Size([2, 3])
# torch.return_types.max(
# values=tensor([[ 0.2790, 1.0183, 0.0761],
# [-0.4228, 1.6208, 0.9449]]),
# indices=tensor([[1, 3, 1],
# [1, 1, 2]])
# )

torch.argmax

torch.argmax

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import torch

input = torch.randn(2, 3, 4)
print(input) # shape: torch.Size([2, 3, 4])
# tensor([[[ 0.3345, 0.1024, 0.5986, 0.9027],
# [-0.6041, 0.7728, -0.3231, 0.2327],
# [-0.8263, 0.1184, 1.3489, -1.4658]],

# [[-0.5787, 0.3613, -1.7988, -0.0108],
# [-0.3335, -0.4843, 1.0525, -3.6239],
# [-0.7718, -0.7116, -1.3299, -2.8085]]])

output = torch.argmax(input, dim=0)
print(output) # shape: torch.Size([3, 4])
# tensor([[0, 1, 0, 0],
# [1, 0, 1, 0],
# [1, 0, 0, 0]])

output = torch.argmax(input, dim=1)
print(output) # shape: torch.Size([2, 4])
# tensor([[0, 1, 2, 0],
# [1, 0, 1, 0]])

output = torch.argmax(input, dim=2)
print(output) # shape: torch.Size([2, 3])
# tensor([[3, 1, 2],
# [1, 2, 1]])

torch.unsqueeze

torch.unsqueeze

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch

x = torch.tensor([0, 1, 2])
print(x, x.shape)
# tensor([0, 1, 2]) torch.Size([3])

x1 = x.unsqueeze(0)
print(x1, x1.shape)
# tensor([[0, 1, 2]]) torch.Size([1, 3])

x2 = x.unsqueeze(1)
print(x2, x2.shape)
# tensor([[0],
# [1],
# [2]]) torch.Size([3, 1])

y = x1-x2
print(y, y.shape)
# tensor([[ 0, 1, 2],
# [-1, 0, 1],
# [-2, -1, 0]]) torch.Size([3, 3])

torch.squeeze

torch.squeeze{target="_blank"}

1
2
3
4
5
x = torch.zeros(2, 1, 2, 1, 2) # shape: [2, 1, 2, 1, 2]
torch.squeeze(x) # shape: [2, 2, 2]
torch.squeeze(x, 0) # shape: [2, 1, 2, 1, 2]
torch.squeeze(x, 1) # shape: [2, 2, 1, 2]
torch.squeeze(x, (1, 2, 3)) # shape: [2, 2, 2]

torch.roll

torch.roll

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
# tensor([[1, 2],
# [3, 4],
# [5, 6],
# [7, 8]])

# Roll the tensor input along the given dimension(s).
torch.roll(x, 2, 0)
# tensor([[5, 6],
# [7, 8],
# [1, 2],
# [3, 4]])
torch.roll(x, 1, 1)
# tensor([[2, 1],
# [4, 3],
# [6, 5],
# [8, 7]])
torch.roll(x, shifts=(2, 1), dims=(0, 1))
# tensor([[6, 5],
# [8, 7],
# [2, 1],
# [4, 3]])

# If dims is None, the tensor will be flattened before rolling
# and then restored to the original shape.
torch.roll(x, 1)
# tensor([[8, 1],
# [2, 3],
# [4, 5],
# [6, 7]])

Broadcasting semantics (广播机制)

Broadcasting semantics | pytorch docs

Broadcasting | numpy docs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch

x = torch.tensor([[1, 2, 3]]) # shape: torch.Size([1, 3])
y = torch.tensor([[1], # shape: torch.Size([3, 1])
[2],
[3]])
print(x-y)
# tensor([[ 0, 1, 2],
# [-1, 0, 1],
# [-2, -1, 0]])
# shape: torch.Size([3, 3])
print(y-x)
# tensor([[ 0, -1, -2],
# [ 1, 0, -1],
# [ 2, 1, 0]])
# shape: torch.Size([3, 3])
1
2
3
4
5
     x      -      y
👇
[[1, 2, 3], [[1, 1, 1],
[1, 2, 3] - [2, 2, 2],
[1, 2, 3]] [3, 3, 3]]

Pytorch实现线性回归

torch.nn.Linear介绍

torch.nn.Linear(in_features, out_features, bias=True)

Linear是一个类, 它按照下式计算输入数据的线性变换

y=xAT+by = xA^T+b

其中 xx 是行向量, 当批量处理样本时, x\bold{x}nn 个行向量构成的矩阵, 每一行都是一个样本.

Linear一共有三个输入参数:

  • 第一个参数为in_features, int型: 输入数据的特征数
  • 第二个参数为out_features, int型: 输出数据的特征数
  • 第三个参数为bias, bool型: 默认为True, 设为False时, 偏置项为0

其中第一第二个参数决定了变换矩阵ATA^T的尺寸, ATA^T是一个 in_f×out_f\rm in\_f\times out\_f 的矩阵

AT,bA^T,b 的初始化是由 U(1in_f,1in_f)U(-\frac{1}{\sqrt{\rm in\_f}},\frac{1}{\sqrt{\rm in\_f}}) 的均匀分布随机初始化的

这个分布叫做“kaiming uniform”\text{``kaiming uniform''}, 是15年2月何愷明 (Kaiming He)在他的论文中提出

Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification

1
2
3
4
5
>>> model = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = model(input)
>>> print(output.size())
torch.Size([128, 30])

torch.nn.MSELoss介绍

torch.nn.MSELoss

MSE为均方误差(mean square error), 而MSELoss将返回一个对象用于计算input和target间的MSE.

MSELoss的reduction有三个选项none|mean(default)|sum

1
2
3
#设置输入值和目标值
input = torch.tensor([[1.,1,1],[1,1,1]])
target = torch.tensor([[1.,2,3],[4,5,6]])
1
2
3
4
5
6
7
8
9
10
#创建loss对象
loss = torch.nn.MSELoss(reduction="none"|"mean"|"sum")
output = loss(input, target)
#reduction="none"时输出output
tensor([[ 0., 1., 4.],
[ 9., 16., 25.]])
#reduction="mean"时输出output
tensor(9.1667)
#reduction="sum"时输出output
tensor(55.)

用Linear实现线性回归

  • 数据
1
2
3
import torch 
x_data = torch.tensor([[1.0],[2.0],[3.0]])
y_data = torch.tensor([[2.0],[4.0],[6.0]])
  • 模型搭建
1
2
3
4
5
6
7
8
9
10
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
self.linear = torch.nn.Linear(1, 1)

def forward(self, x):
y_pred = self.linear(x)
return y_pred

model = LinearModel()
  • 损失函数
1
criterion = torch.nn.MSELoss(reduction="sum")
  • 优化器
1
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
  • 参数训练
1
2
3
4
5
6
7
8
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss)

optimizer.zero_grad()
loss.backward()
optimizer.step()
  • 打印输出
1
2
3
4
5
6
7
8
#输出A^T和bias
print("A^T = ", model.linear.weight.item())
print("b = ", model.linear.bias.item())

#测试模型
x_test = torch.Tensor([4.0])
y_test = model(x_test)
print("y_pred = ", y_test.data)

torch.nn.modules.linear

torch.nn.modules.linear

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class Linear(Module):
__constants__ = ['in_features', 'out_features']
in_features: int
out_features: int
weight: Tensor

def __init__(self, in_features: int, out_features: int, bias: bool = True,
device=None, dtype=None) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
if bias:
self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
else:
self.register_parameter('bias', None)
self.reset_parameters()

def reset_parameters(self) -> None:
# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
# uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
# https://github.com/pytorch/pytorch/issues/57109
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
init.uniform_(self.bias, -bound, bound)

def forward(self, input: Tensor) -> Tensor:
return F.linear(input, self.weight, self.bias) # <-- Note

Note that modules.Linear use F.linear to calculate forward result:

torch.nn.functional.linear(input, weight, bias=None)

For the incoming input xx, weight matrix AA and bias bb, the F.linear will apply a linear transformation:

y=xAT+by = xA^T + b

If the shape of input xx is (*, in_features) where * means any number of additional dimensions including none and shape of weight matrix AA is (out_features, in_features), the shape of output will be (*, out_features).

Example 1:

x:[1, 2, 34, 5, 6],A:[1,0,00,1,0],b:Nonex: \begin{bmatrix} \colorbox{yellow}{1, 2, 3} \\ \colorbox{pink}{4, 5, 6}\end{bmatrix},\quad A: \begin{bmatrix} 1, 0, 0\\0, 1, 0\end{bmatrix},\quad b: None

y=xAT=[1, 2, 34, 5, 6][1,00,10,0]=[1, 24, 5]y = xA^T = \begin{bmatrix} \colorbox{yellow}{1, 2, 3} \\ \colorbox{pink}{4, 5, 6}\end{bmatrix} \begin{bmatrix} 1, 0 \\ 0, 1 \\ 0, 0\end{bmatrix} = \begin{bmatrix} \colorbox{yellow}{1, 2} \\ \colorbox{pink}{4, 5}\end{bmatrix}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import torch
import torch.nn.functional as F

input = torch.tensor([[1, 2, 3],
[4, 5, 6]]) # shape: [2, 3]

weight = torch.tensor([[1, 0, 0],
[0, 1, 0]]) # shape: [2, 3]

output = F.linear(input, weight) # shape: [2, 2]

print(output)
# tensor([[1, 2],
# [4, 5]])

Example 2:

x:[[1, 2, 34, 5, 6],[7, 8, 90, 1, 2]],A:[1,0,00,1,0],b:Nonex: \begin{bmatrix} \begin{bmatrix} \colorbox{yellow}{1, 2, 3} \\ \colorbox{pink}{4, 5, 6}\end{bmatrix}, \begin{bmatrix} \colorbox{yellow}{7, 8, 9} \\ \colorbox{pink}{0, 1, 2}\end{bmatrix} \end{bmatrix},\quad A: \begin{bmatrix} 1, 0, 0\\0, 1, 0\end{bmatrix},\quad b: None

y=xAT=[[1, 2, 34, 5, 6],[7, 8, 90, 1, 2]][1,00,10,0]=[[1, 24, 5],[7, 80, 1]]y = xA^T = \begin{bmatrix} \begin{bmatrix} \colorbox{yellow}{1, 2, 3} \\ \colorbox{pink}{4, 5, 6}\end{bmatrix}, \begin{bmatrix} \colorbox{yellow}{7, 8, 9} \\ \colorbox{pink}{0, 1, 2}\end{bmatrix} \end{bmatrix} \begin{bmatrix} 1, 0 \\ 0, 1 \\ 0, 0\end{bmatrix} = \begin{bmatrix} \begin{bmatrix}\colorbox{yellow}{1, 2} \\ \colorbox{pink}{4, 5}\end{bmatrix}, \begin{bmatrix}\colorbox{yellow}{7, 8} \\ \colorbox{pink}{0, 1}\end{bmatrix} \end{bmatrix}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import torch
import torch.nn.functional as F

input = torch.tensor([[[1, 2, 3],
[4, 5, 6]],
[[6, 7, 8],
[9, 0, 1]]]) # shape: [2, 2, 3]

weight = torch.tensor([[1, 0, 0],
[0, 1, 0]]) # shape: [2, 3]

output = F.linear(input, weight) # shape: [2, 2, 2]

print(output)
# tensor([[[1, 2],
# [4, 5]],

# [[6, 7],
# [9, 0]]])

Note: The basic element that Pytorch or Deep learning operate is vectors, more specifically is row vectors. Even though it’s a matrix, it can be thought of as a set of row vectors. And for a high-dimentional matrix, it can also be viewed as a larger set of row vectors. Therefore, when you apply a transformation to a 2 or higher dimentional matrix, what you actually transform is a large set of row vectors. You transform the vectors in the set and keep the shape of the set.

torch.nn

pytorch/torch/nn | source code | github

Softmax

torch.nn.Softmax

1
torch.nn.Softmax(dim=None)

dim (int) – A dimension along which Softmax will be computed (so every slice along dim will sum to 1).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch

input = torch.randn(2, 3)
print(input)
# tensor([[-1.3018, -0.9303, 0.2111],
# [ 0.7206, 0.0933, -0.8475]])
print(input.sum(dim=0)) # tensor([-0.5812, -0.8370, -0.6364])
print(input.sum(dim=1)) # tensor([-2.0210, -0.0337])

output = torch.nn.Softmax(dim=0)(input)
print(output.sum(dim=0)) # tensor([1., 1., 1.])
print(output.sum(dim=1)) # tensor([1.1236, 1.8764])

output = torch.nn.Softmax(dim=1)(input)
print(output.sum(dim=0)) # tensor([0.7170, 0.5139, 0.7691])
print(output.sum(dim=1)) # tensor([1., 1.])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import torch

input = torch.randn(2, 3, 4)
print(input)
# tensor([[[ 1.4035, -1.5199, -0.5251, 0.0535],
# [ 0.2582, 2.2726, -0.6482, -1.5214],
# [-0.4722, -0.4668, -0.3910, -0.3729]],

# [[ 0.3186, 1.2075, -0.4249, 0.3235],
# [ 0.0873, -1.7824, 0.5938, -2.2797],
# [-0.9566, -1.0026, 0.0575, 0.3927]]])

output = torch.nn.Softmax(dim=0)(input)
print(output.sum(dim=0)) # shape: torch.Size([3, 4])
# tensor([[1.0000, 1.0000, 1.0000, 1.0000],
# [1.0000, 1.0000, 1.0000, 1.0000],
# [1.0000, 1.0000, 1.0000, 1.0000]])

output = torch.nn.Softmax(dim=1)(input)
print(output.sum(dim=1)) # shape: torch.Size([2, 4])
# tensor([[1., 1., 1., 1.],
# [1., 1., 1., 1.]])

output = torch.nn.Softmax(dim=2)(input)
print(output.sum(dim=2)) # shape: torch.Size([2, 3])
# tensor([[1.0000, 1.0000, 1.0000],
# [1.0000, 1.0000, 1.0000]])

LogSoftmax

torch.nn.LogSoftmax

torch.nn.LogSoftmax()对输入的tensor进行 logsoftmax\rm log\circ softmax 操作, 例如 (n+1)×(c+1)(n+1)\times (c+1) 维度的输入tensor(n+1个样本, c+1个类别)

tensor([[a00,a01,,a0c],[a10,a11,,a1c],[an0,an1,,anc]])\begin{split} \rm tensor([&[a_{00},a_{01},\cdots,a_{0c}],\\ &[a_{10},a_{11},\cdots,a_{1c}],\\ &\cdots\\ &[a_{n0},a_{n1},\cdots,a_{nc}] ]) \end{split}

经过 torch.nn.LogSoftmax(dim=1)\rm torch.nn.LogSoftmax(dim=1) 操作后返回

1
2
3
4
5
6
7
\begin{split}
\rm tensor([&\rm log\circ softmax([a_{00},a_{01},\cdots,a_{0c}]),\\
&\rm log\circ softmax([a_{10},a_{11},\cdots,a_{1c}]),\\
&\vdots\\
&\rm log\circ softmax([a_{n0},a_{n1},\cdots,a_{nc}])
])
\end{split}

其中 logsoftmax\rm log\circ softmax 如下计算

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
\begin{array}{c}
[[\log(\frac{[e^{a_{00}},e^{a_{01}},\cdots,e^{a_{0c}}]}{\sum_{j=0}^{c}e^{a_{0j}}})],\\
\vdots\\
[\log(\frac{[e^{a_{00}},e^{a_{01}},\cdots,e^{a_{0c}}]}{\sum_{j=0}^{c}e^{a_{2j}}})]]
\end{array}
=
\begin{array}{c}
[[a_{00},a_{01},\cdots,a_{0c}],\\
\vdots\\
[a_{n0},a_{n1},\cdots,a_{nc}]]
\end{array}
-
\begin{array}{c}
[[\log(e^{a_{00}}+\cdots+e^{a_{0c}})],\\
\vdots\\
[\log(e^{a_{n0}}+\cdots+e^{a_{nc}})]]
\end{array}

经过 torch.nn.LogSoftmax(dim=0)\rm torch.nn.LogSoftmax(dim=0) 操作将返回

tensor([ls(a00a10an0),ls(a01a11an1),,ls(a0ca1canc)])\begin{split} \rm tensor( \left[\rm l\circ s\begin{pmatrix}a_{00}\\a_{10}\\\vdots\\a_{n0}\end{pmatrix}, l\circ s\begin{pmatrix}a_{01}\\a_{11}\\\vdots\\a_{n1}\end{pmatrix},\cdots, l\circ s\begin{pmatrix}a_{0c}\\a_{1c}\\\vdots\\a_{nc}\end{pmatrix} \right]) \end{split}

logsoftmax(z)\rm log \circ softmax(\bold{z}) 的性质

log softmax(z)=log([ez1ez,,eznez])=[log(ez1ez),,log(eznez)]=[z1log(ez),,znlog(ez)]=zlog(i=1nezi)\begin{split} \log \circ \text{ softmax}(\bold{z}) &= \log([\frac{e^{z_1}}{\sum e^z}, \cdots, \frac{e^{z_n}}{\sum e^z}])\\ &= [\log(\frac{e^{z_1}}{\sum e^z}), \cdots, \log(\frac{e^{z_n}}{\sum e^z})]\\ &= [z_1-\log(\sum e^z), \cdots, z_n-\log(\sum e^z)]\\ &= \bold{z}-\log(\sum_{i=1}^ne^{z_i}) \end{split}

Pytorch演示:

1
2
3
#输入向量 torch.ones(2,3)
tensor([[1., 1., 1.],
[1., 1., 1.]])
1
2
3
4
5
# dim=1
m = nn.LogSoftmax(dim=1)
# 输出 m(torch.ones(2,3))
tensor([[-1.0986, -1.0986, -1.0986],
[-1.0986, -1.0986, -1.0986]])
1
2
3
4
5
#dim=0
m = nn.LogSoftmax(dim=0)
# 输出 m(torch.ones(2,3))
tensor([[-0.6931, -0.6931, -0.6931],
[-0.6931, -0.6931, -0.6931]])

其中 1log(3e)=log(3)1.0986,1log(2e)=log(2)0.69311-\log(3e) = -\log(3)\approx -1.0986,\qquad 1-\log(2e)=-\log(2)\approx-0.6931

LogSoftmax 对平移保持不变

logsoftmax(x+c)=[x0+c,,xn+c]log(ex+c)=x+clog(ecex)=xlog(ex)=logsoftmax(x)\begin{split} \rm log\circ softmax(\bold{x}+c) &= [x_0+c,\cdots,x_n+c] - \log(\sum \bold{e}^{\bold{x}+c})\\ &=\bold{x}+c-\log(e^c\sum\bold{e^x})\\ &=\bold{x}-\log(\sum\bold{e^x})\\ &=\rm log\circ softmax(\bold{x}) \end{split}

例如

logsoftmax([0,1,2,3,4])=logsoftmax([5,6,7,8,9])=logsoftmax([10,11,12,13,14])=[4.4519,3.4519,2.4519,1.4519,0.4519]\begin{split} &\rm log\circ softmax\left( \begin{bmatrix} 0,1,2,3,4 \end{bmatrix} \right) \\ =&\rm log\circ softmax\left( \begin{bmatrix} 5,6,7,8,9 \end{bmatrix} \right) \\ =&\rm log\circ softmax\left( \begin{bmatrix} 10,11,12,13,14 \end{bmatrix} \right) \\ =&[-4.4519, -3.4519, -2.4519, -1.4519, -0.4519] \end{split}

NLLLoss

  • Pytorch Docs
    torch.nn.NLLLoss

  • Descriptions
    NLLLoss全称Negative Log Likelihood Loss, 用于计算输入 xx 和输出 yy 的NLLLoss

    对于输入的 input\rm input

    tensor([[a00,a01,,a0c],[a10,a11,,a1c],[an0,an1,,anc]])\begin{split} \rm tensor([&[a_{00},a_{01},\cdots,a_{0c}],\\ &[a_{10},a_{11},\cdots,a_{1c}],\\ &\cdots\\ &[a_{n0},a_{n1},\cdots,a_{nc}] ]) \end{split}

    以及目标 target=[y0,y1,,yn],y0,,n{0,1,,c}\rm target = [y_0,y_1,\cdots,y_n],\quad y_{0,\cdots, n}\in\{0,1,\cdots,c\}

    进行 troch.nn.NLLLoss(reduction=“none|sum|mean”)\rm troch.nn.NLLLoss(reduction=\text{``none|sum|mean''}) 操作

    • reduction="none", 输出

      tensor([a0,y0,a1,y1,,an,yn])\rm tensor([\rm -a_{0,y_0}, -a_{1,y_1},\cdots,-a_{n,y_n}])

      其中 a0,y0a_{0,y_0} 是第0个样本的第y0y_0个值;a1,y1a_{1,y_1} 是第1个样本的第y1y_1个值…

    • reduction="sum", 输出

      sum([a0,y0,a1,y1,,an,yn])\rm sum([\rm -a_{0,y_0}, -a_{1,y_1},\cdots,-a_{n,y_n}])

    • reduction="mean"(default), 输出

      mean([a0,y0,a1,y1,,an,yn])\rm mean([\rm -a_{0,y_0}, -a_{1,y_1},\cdots,-a_{n,y_n}])

NLLLoss on Segmantation

  • Descriptions
    For the inputs in segmentation, a Prediction with shape (Batch_size, Class_num, Height, Width) and a Mask or Ground Truth with shape (Batch_size, Height, Width), NLLLosss gives the following output with shape (Batch_size, Height, Width):

    Output[b,h,w]=Prediction[b,Mask[b,h,w]{0,,C1},h,w]b{0,,B1},h{0,,H1},w{0,,W1}\begin{gather*} \rm Output[b,h,w] = - Prediction[b,\underset{\in \{0,\cdots,C-1\}}{\underline{Mask[b,h,w]}},h,w]\\ b\in\{0,\cdots,B-1\},h\in\{0,\cdots,H-1\},w\in\{0,\cdots,W-1\} \end{gather*}

  • Demon

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import torch
    from torch import nn

    # Batch_Size: 2, Channels(Classes): 3, Height: 4, Width: 5
    B, C, H, W = 2, 3, 4, 5
    mask = torch.randint(low=0, high=C, size=(B, H, W)) # ∈ {0, 1, 2}
    # mask.shape = torch.Size([2, 4, 5])
    prediction = torch.arange(B*C*H*W, dtype=torch.float).reshape(B, C, H, W)
    # prediction.shape = torch.Size([2, 3, 4, 5])
    nllloss = nn.NLLLoss(reduction='none')
    loss = nllloss(prediction, mask)
    # loss.shape = torch.Size([2, 4, 5])

    for b in range(B):
    for h in range(H):
    for w in range(W):
    assert loss[b,h,w]==-prediction[b,mask[b,h,w],h,w], "Unequal"

NLLLoss + LogSoftmax

通常会首先对 input\rm inputtorch.nn.LogSoftmax(dim=1)\rm torch.nn.LogSoftmax(dim=1), 再进行 NLLLoss\rm NLLLoss, 这样得到的就是交叉熵损失

  • 直接进行 NLLLoss\rm NLLLoss 操作

    1
    2
    3
    4
    5
    6
    7
    8
    #设置NLLLoss, reduction="none"直接输出每个样本的结果,不作sum\mean操作
    loss = nn.NLLLoss(reduction="none")
    #设置input, 3个样本
    input = torch.arange(15,dtype=float).reshape(3, 5)
    #设置target, 3个标签, 每个标签值介于[0,5)之间
    target = torch.tensor([1, 0, 4])
    #直接计算input和target的loss
    output = loss(input, target)
    1
    2
    3
    4
    5
    6
    7
    8
    # 输出 input
    tensor([[ 0., 1., 2., 3., 4.],
    [ 5., 6., 7., 8., 9.],
    [10., 11., 12., 13., 14.]], dtype=torch.float64)
    # 输出 target
    tensor([1, 0, 4])
    # 输出 output
    tensor([ -1., -5., -14.], dtype=torch.float64)
  • 先取 LogSoftmax\rm LogSoftmax 再进行 NLLLoss\rm NLLLoss 操作

    1
    2
    3
    4
    #设置LogSoftmax
    m = nn.LogSoftmax(dim=1)
    #对input取LogSoftmax操作再与target做Loss
    output = loss(m(input), target)
    1
    2
    3
    4
    5
    6
    7
    8
    # 输出 m(input)
    tensor([[-4.4519, -3.4519, -2.4519, -1.4519, -0.4519],
    [-4.4519, -3.4519, -2.4519, -1.4519, -0.4519],
    [-4.4519, -3.4519, -2.4519, -1.4519, -0.4519]], dtype=torch.float64)
    # 输出 target
    tensor([1, 0, 4])
    # 输出 output
    tensor([3.4519, 4.4519, 0.4519], dtype=torch.float64)

CrossEntropyLoss

  • Pytorch Docs
    torch.nn.CrossEntropyLoss

  • Formulas

    CrossEntropyLoss(input,target)=NLLLoss(LogSoftmax(input),target)\rm CrossEntropyLoss(\textcolor{red}{input}, target) = NLLLoss(\textcolor{blue}{LogSoftmax(\textcolor{red}{input})},target)

  • Description
    对于输入的 input\rm input,其有(n+1)个样本,(c+1)个类别

    tensor([[a00,a01,,a0c],[a10,a11,,a1c],[an0,an1,,anc]])\begin{split} \rm tensor([&[a_{00},a_{01},\cdots,a_{0c}],\\ &[a_{10},a_{11},\cdots,a_{1c}],\\ &\cdots\\ &[a_{n0},a_{n1},\cdots,a_{nc}] ]) \end{split}

    以及目标 target=[y0,y1,,yn],y0,,n{0,1,,c}\rm target = [y_0,y_1,\cdots,y_n],\quad y_{0,\cdots, n}\in\{0,1,\cdots,c\}

    进行 troch.nn.CrossEntropyLoss(reduction=“none|sum|mean”)\rm troch.nn.CrossEntropyLoss(reduction=\text{``none|sum|mean''}) 操作

    • reduction="none", 输出

      [logsoftmax([a00,,a0c])y0,,logsoftmax([an0,,anc])yn]=[logexp(a0y0)exp(a00)++exp(a0c),,logexp(anyn)exp(an0)++exp(anc)]=[l0,,ln]\begin{split} &\rm [-log\circ softmax([a_{00},\cdots,a_{0c}])_{\color{red}y_0},\cdots,-log\circ softmax([a_{n0},\cdots,a_{nc}])_{\color{red}y_n}] \\ =&\rm [-\log\frac{\exp(a_{0\color{red}y_0})}{\exp(a_{00})+\cdots+\exp(a_{0c})},\cdots,-\log\frac{\exp(a_{n\color{red}y_n})}{\exp(a_{n0})+\cdots+\exp(a_{nc})}] \\ =& [l_0, \cdots, l_n] \end{split}

    • reduction="sum", 输出

      sum([l0,,ln]){\rm sum}([l_0, \cdots, l_n])

    • reduction="mean"(default), 输出

      mean([l0,,ln]){\rm mean}([l_0, \cdots, l_n])

  • Pytorch演示

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # input和target
    input = torch.arange(15,dtype=float).reshape(3, 5)
    target = torch.tensor([1, 0, 4])

    m = nn.LogSoftmax(dim=1)
    NLloss = nn.NLLLoss(reduction="none")
    CRloss = nn.CrossEntropyLoss(reduction="none")
    output_NL = NLloss(m(input), target)
    output_CR = CRloss(input, target)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    #输出m(input)
    tensor([[-4.4519, -3.4519, -2.4519, -1.4519, -0.4519],
    [-4.4519, -3.4519, -2.4519, -1.4519, -0.4519],
    [-4.4519, -3.4519, -2.4519, -1.4519, -0.4519]], dtype=torch.float64)
    #输出target
    tensor([1, 0, 4])
    #输出output_NL
    tensor([3.4519, 4.4519, 0.4519], dtype=torch.float64)
    #输出output_CR
    tensor([3.4519, 4.4519, 0.4519], dtype=torch.float64)

    注意上面的两个输出, NLloss(m(input),target)\rm NLloss(\textcolor{blue}{m(\textcolor{red}{input})}, target)CRloss(input,target)\rm CRloss(\textcolor{red}{input}, target) 的计算结果是一样的.

CrossEntropyLoss on Classification

对于一个分类器如下, 这两种方法使用 loss 是等价的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import torch
from torch import nn

class Classifier(nn.Module):
def __init__(self, in_features: int, class_num: int) -> None:
super(Classifier, self).__init__()
self.mlp = nn.Linear(in_features=in_features, out_features=class_num)

def forward(self, input):
return self.mlp(input)

Batch, Channel = 8, 512
inputs = torch.rand(Batch, Channel) # shape: torch.Size([8, 512])
labels = torch.randint(low=0, high=10, size=(Batch,)) # shape: torch.Size([8])

classifier = Classifier(in_features=512, class_num=10)
outputs = classifier(inputs) # torch.Size([8, 10])

softmax = nn.Softmax(dim=-1)
NLloss = nn.NLLLoss(reduction="none")
CRloss = nn.CrossEntropyLoss(reduction="none")

loss_nl = NLloss(torch.log(softmax(outputs)), labels)
loss_cr = CRloss(outputs, labels)
print(loss_nl)
print(loss_cr)

CrossEntropyLoss 本身已经封装了一层 -Log(Softmax(x)), 所以如果模型本身已经对输出做了 Log(Softmax(x)) 操作, 可以考虑使用 NLLLoss 作为损失函数. 而对于没有使用 LogSoftmax 正则化的模型输出, 使用 CrossEntropLoss 会更简洁, 但是注意在做 Evaluation 的时候需要额外对 outputs 做一次 Softmax(x) 操作作为概率输出, 再取 Softmax(x) 中最大值的 indice 作为预测的 label.

对于 CrossEntropy 的公式应该如下描述

CrossEntropy(x,label)=i=0B1(log(sft(x[i])[label[i]]))CrossEntropy(x, label) = \sum_{i=0}^{B-1} \left( -\log(\underline{\text{sft}(x[i])} [label[i]]) % +\sum_{j\neq label[i]} \log(\underline{\text{sft}(x[i])} [j]) \right)

其中 x 是一个 Batch_size × Class_num 的 Tensor, 而 label 对应的是一个 int (非 one-hot 型) 的 indices. 对于 x 中的一个样本 x[i] ∈ [1 × C], 对其做 softmax 得到 softmax(x[i]) ∈ [1 × C] 表示每个类别的概率. 取这个样本的真实标签值 label[i] (这里不是 one-hot 编码, 只是 int 值). 使用 softmax(x[i]) 与 label[i] 做交叉熵, 得到的就是这个样本的交叉熵损失函数

celoss[i]=log(sft(x[i])[label[i]])celoss[i] = -\log(\underline{sft(x[i])} [label[i]]) % +\sum_{j\neq label[i]} \log(\underline{sft(x[i])} [j])

其中第一项是正确样本的预测概率取 -logsoftmax, 而第二项是所有分类错误的预测概率取 logsoftmax. 这里取正负是因为我们使用梯度下降法来降低 loss, 所以我们希望正样本的loss变高, 即取负号后的值变低, 而负样本的loss变低即本身(正号不取负)的值变低.

关于为什么要取 log?

CrossEntropyLoss on Segmentation

  • Descriptions
    For the inputs in segmentation, a Prediction with shape (Batch_size, Class_num, Height, Width) and a Mask or Ground Truth with shape (Batch_size, Height, Width), CrossEntropyLoss will do LogSoftmax\rm -Log\circ Softmax on every pixel of every batch of Prediction, which has Class_num entries, and choose the i-th(depending on mask) entry and finally gives the output with shape (Batch_size, Height, Width):

    Output[b,h,w]=LogSoftmax(dim=1)(Prediction)[b,Mask[b,h,w]choose i-th class,h,w]b{0,,B1},h{0,,H1},w{0,,W1}\begin{gather*} \rm Output[b,h,w] = - \underline{LogSoftmax(dim=1)(Prediction)} \Big[b,\underset{\text{choose i-th class}}{\underline{Mask[b,h,w]}},h,w\Big]\\ b\in\{0,\cdots,B-1\},h\in\{0,\cdots,H-1\},w\in\{0,\cdots,W-1\} \end{gather*}

    The Prediction is actually the output of a model/network.

  • Demo

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import torch
    from torch import nn

    # Batch_Size: 2, Channels(Classes): 3, Height: 4, Width: 5
    B, C, H, W = 2, 3, 4, 5
    mask = torch.randint(low=0, high=C, size=(B, H, W)) # ∈ {0, 1, 2}
    # mask.shape = torch.Size([2, 4, 5])
    prediction = torch.arange(B*C*H*W, dtype=torch.float).reshape(B, C, H, W)
    # prediction.shape = torch.Size([2, 3, 4, 5])
    crossentropyloss = nn.CrossEntropyLoss(reduction='none')
    loss = crossentropyloss(prediction, mask)
    # loss.shape = torch.Size([2, 4, 5])

    logsofmax_prediction = nn.LogSoftmax(dim=1)(prediction)
    for b in range(B):
    for h in range(H):
    for w in range(W):
    assert loss[b,h,w]==-logsofmax_prediction[b,mask[b,h,w],h,w], "Unequal"

RuntimeError: CUDA error: device-side assert triggered

This error happens when your mask(target) values are out of bounds. For example

  • Recurrence

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import torch
    from torch import nn

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Batch_Size: 2, Height: 4, Width: 5
    mask = torch.randint(low=1, high=4, size=(2, 4, 5)).cuda()
    # Batch_Size: 2, Channels(Classes): 3, Height: 4, Width: 5
    output = torch.randn(2, 3, 4, 5).cuda()

    CE_Loss = nn.CrossEntropyLoss()
    loss = CE_Loss(output, mask)
    '''RuntimeError: CUDA error: device-side assert triggered'''

The mask contains values 1, 2 and 3, but output has only 3 classes which are “0”, “1” and “2”, so the value 3 is out of bounds.
When you run this code on cpu, you will have this error

  • Run above code on cpu

    1
    2
    3
    4
    ...
    # loss = CE_Loss(output, mask)
    loss = CE_Loss(output.cpu(), mask.cpu())
    '''IndexError: Target 3 is out of bounds.'''

Conv2d

torch.nn.Conv2d

使用 Conv2d 方法实现卷积非常方便, 只需要设置 in_channels, out_channels, kernel_size 三个参数就可以使用了, 不需要关注输入输出图片的长和宽.
其它参数包括 stride, padding, bias 也都可以指定, 详细的参数可以查看官方文档.
下面是一个实例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import torch

#随机生成一个矩阵(图片)
input = torch.randn(1, #batch_size
5, #in_channels
100, #width
100) #height

#定义一个卷积
#这个卷积由10个5x(3x3)的卷积核构成
conv = torch.nn.Conv2d(in_channels=5,
out_channels=10,
kernel_size=3)

#用上述定义的卷积核对 input 进行卷积操作
output = conv(input)

print(input.shape) #torch.Size([1, 5, 100, 100])
print(conv.weight.shape) #torch.Size([10, 5, 3, 3])
print(output.shape) #torch.Size([1, 10, 98, 98])

转置卷积

移动窗口的卷积运算可以转换成矩阵乘法, 将输入图片中的像素按从左到右从上到下的顺序排列成一个列向量, 卷积核的每个窗口都可以同样排成一个行向量, 整个卷积核对应着一个矩阵, 其行数是卷积窗口的数量, 列数是输入图片的像素数.
例如, 对于 4×44\times4 的输入图片和 3×33\times3 的卷积核, 做 padding=0, strides=1 的卷积运算, 得到一个 2×22\times2 的输出. 将这个过程用矩阵乘法表示如下:

[y00y01y10y11]=[w0,0w0,1w0,20w1,0w1,1w1,20w2,0w2,1w2,2000000w0,0w0,1w0,20w1,0w1,1w1,20w2,0w2,1w2,200000000w0,0w0,1w0,20w1,0w1,1w1,20w2,0w2,1w2,2000000w0,0w0,1w0,20w1,0w1,1w1,20w2,0w2,1w2,2][x00x01x02x03x30x31x32x33]\scriptsize{ \begin{bmatrix} y_{00} \\ y_{01} \\ y_{10} \\ y_{11} \end{bmatrix} = \left[\begin{array}{cccc|cccc|cccc|cccc} w_{0,0} & w_{0,1} & w_{0,2} & 0 & w_{1,0} & w_{1,1} & w_{1,2} & 0 & w_{2,0} & w_{2,1} & w_{2,2} & 0 & 0 & 0 & 0 & 0 \\ 0 & w_{0,0} & w_{0,1} & w_{0,2} & 0 & w_{1,0} & w_{1,1} & w_{1,2} & 0 & w_{2,0} & w_{2,1} & w_{2,2} & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & w_{0,0} & w_{0,1} & w_{0,2} & 0 & w_{1,0} & w_{1,1} & w_{1,2} & 0 & w_{2,0} & w_{2,1} & w_{2,2} & 0 \\ 0 & 0 & 0 & 0 & 0 & w_{0,0} & w_{0,1} & w_{0,2} & 0 & w_{1,0} & w_{1,1} & w_{1,2} & 0 & w_{2,0} & w_{2,1} & w_{2,2} \end{array}\right] \begin{bmatrix} x_{00} \\ x_{01} \\ x_{02} \\ x_{03} \\ \vdots \\ x_{30} \\ x_{31} \\ x_{32} \\ x_{33} \end{bmatrix} }

上面矩阵中的每一行都对应着一个卷积的窗口, 它们和输入图片做 element-wise 的乘积再求和, 即得到对应窗口位置的卷积输出, 如下示意:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
\scriptsize
\begin{bmatrix}
\textcolor{red}{y_{00} }\\ y_{01} \\ y_{10} \\ y_{11}
\end{bmatrix}
=
\begin{matrix}
\textcolor{red}{
\begin{bmatrix}
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0 \\
0 & 0 & 0 & 0
\end{bmatrix}}
\\
\begin{bmatrix}
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2} \\
0 & 0 & 0 & 0
\end{bmatrix}
\\
\begin{bmatrix}
0 & 0 & 0 & 0 \\
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0
\end{bmatrix}
\\
\begin{bmatrix}
0 & 0 & 0 & 0 \\
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2}
\end{bmatrix}
\end{matrix}
*
\textcolor{red}{
\begin{bmatrix}
x_{00} & x_{01} & x_{02} & x_{03} \\
x_{10} & x_{11} & x_{12} & x_{13} \\
x_{20} & x_{21} & x_{22} & x_{23} \\
x_{30} & x_{31} & x_{32} & x_{33}
\end{bmatrix}}

而转置卷积是在上述矩阵乘法形式的卷积的基础上, 将变换矩阵转置. 从而原本 164×442×2\overset{4\times4}{16}\rightarrow \overset{2\times2}{4} 的线性变换, 变成了 42×2164×4\overset{2\times2}{4}\rightarrow \overset{4\times4}{16} 的线性变换. 如此可以对图片进行放大, 进行上采样.

[y00y01y02y03y30y31y32y33]=[w0,0000w0,1w0,000w0,2w0,1000w0,200w1,00w0,00w1,1w1,0w0,1w0,0w1,2w1,1w0,2w0,10w1,20w0,2w2,00w1,00w2,1w2,0w1,1w1,0w2,2w2,1w1,2w1,10w2,20w1,200w2,0000w2,1w2,000w2,2w2,1000w2,2][x00x01x10x11]\scriptsize{ \begin{bmatrix} \textcolor{red}{y'_{00}} \\ y'_{01} \\ y'_{02} \\ y'_{03} \\ \vdots \\ y'_{30} \\ y'_{31} \\ y'_{32} \\ y'_{33} \end{bmatrix} = \left[\begin{array}{cccc} \textcolor{red}{w_{0,0}} & \textcolor{red}{0} & \textcolor{red}{0} & \textcolor{red}{0} \\ w_{0,1} & w_{0,0} & 0 & 0 \\ w_{0,2} & w_{0,1} & 0 & 0 \\ 0 & w_{0,2} & 0 & 0 \\ \hline w_{1,0} & 0 & w_{0,0} & 0 \\ w_{1,1} & w_{1,0} & w_{0,1} & w_{0,0} \\ w_{1,2} & w_{1,1} & w_{0,2} & w_{0,1} \\ 0 & w_{1,2} & 0 & w_{0,2}\\ \hline w_{2,0} & 0 & w_{1,0} & 0 \\ w_{2,1} & w_{2,0} & w_{1,1} & w_{1,0} \\ w_{2,2} & w_{2,1} & w_{1,2} & w_{1,1} \\ 0 & w_{2,2} & 0 & w_{1,2} \\ \hline 0 & 0 & w_{2,0} & 0 \\ 0 & 0 & w_{2,1} & w_{2,0} \\ 0 & 0 & w_{2,2} & w_{2,1} \\ 0 & 0 & 0 & w_{2,2} \end{array}\right] \textcolor{red}{ \begin{bmatrix} x'_{00} \\ x'_{01} \\ x'_{10} \\ x'_{11} \end{bmatrix}} }

转置矩阵中的每一列都对应着一个卷积的窗口, 上面的过程对应着, 卷积窗口和输入图片作乘积再求和, 如下示意:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
\tiny
\begin{split}
\begin{bmatrix}
y'_{00} & y'_{01} & y'_{02} & y'_{03}\\
y'_{10} & y'_{11} & y'_{12} & y'_{13}\\
y'_{20} & y'_{21} & y'_{22} & y'_{23}\\
y'_{30} & y'_{31} & y'_{32} & y'_{33}
\end{bmatrix}
&=
\left\{
\textcolor{red}{
\begin{bmatrix}
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0 \\
0 & 0 & 0 & 0
\end{bmatrix}},
\begin{bmatrix}
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2} \\
0 & 0 & 0 & 0
\end{bmatrix},
\begin{bmatrix}
0 & 0 & 0 & 0 \\
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0
\end{bmatrix},
\begin{bmatrix}
0 & 0 & 0 & 0 \\
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2}
\end{bmatrix}
\right\}
\begin{bmatrix}
\textcolor{red}{x'_{00}}\\ x'_{01}\\ x'_{10}\\ x'_{11}
\end{bmatrix}\\
&=
\begin{bmatrix}
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0 \\
0 & 0 & 0 & 0
\end{bmatrix}x'_{00}+
\begin{bmatrix}
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2} \\
0 & 0 & 0 & 0
\end{bmatrix}x'_{01}+
\begin{bmatrix}
0 & 0 & 0 & 0 \\
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0
\end{bmatrix}x'_{10}+
\begin{bmatrix}
0 & 0 & 0 & 0 \\
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2}
\end{bmatrix}x'_{11}\\
&=
\left\{
\begin{matrix}
\textcolor{red}{
\begin{bmatrix}
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0 \\
0 & 0 & 0 & 0
\end{bmatrix}}&
\begin{bmatrix}
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2} \\
0 & 0 & 0 & 0
\end{bmatrix}\\
\begin{bmatrix}
0 & 0 & 0 & 0 \\
w_{0,0} & w_{0,1} & w_{0,2} & 0 \\
w_{1,0} & w_{1,1} & w_{1,2} & 0 \\
w_{2,0} & w_{2,1} & w_{2,2} & 0
\end{bmatrix}&
\begin{bmatrix}
0 & 0 & 0 & 0 \\
0 & w_{0,0} & w_{0,1} & w_{0,2} \\
0 & w_{1,0} & w_{1,1} & w_{1,2} \\
0 & w_{2,0} & w_{2,1} & w_{2,2}
\end{bmatrix}
\end{matrix}
\right\}*
\begin{bmatrix}
\textcolor{red}{x'_{00}} & x'_{01} \\ x'_{10} & x'_{11}
\end{bmatrix}
\end{split}

这样相当于 [x00x01x10x11]\left[\begin{smallmatrix}x_{00}&x_{01}\\x_{10}&x_{11}\end{smallmatrix}\right] 作为权重作用在 4×44\times4 的输出上

1
2
3
4
5
6
7
8
9
10
\scriptsize
\left[
\begin{array}{ccc}
x_{00} & \fbox{$\begin{matrix} x_{00}+x_{01} \end{matrix}$} & x_{01}\\\\
\fbox{$\begin{matrix} x_{00}\\+\\x_{10} \end{matrix}$} &
\fbox{$\begin{matrix} x_{00}+x_{01}\\+\\x_{10}+x_{11} \end{matrix}$} &
\fbox{$\begin{matrix} x_{01}\\+\\x_{11} \end{matrix}$}\\\\
x_{10} & \fbox{$\begin{matrix} x_{10}+x_{11} \end{matrix}$} & x_{11}\\
\end{array}
\right]

这个结果相当于用原卷积核左右镜像+上下镜像后的矩阵作为卷积核, 对两层 padding 的输入图像作卷积

w2,2w2,1w2,0w1,2w1,1w1,0w0,2w0,1w0,0[00000000000000x00x010000x10x1100000000000000]\fbox{$\begin{matrix} w_{2,2} & w_{2,1} & w_{2,0} \\ w_{1,2} & w_{1,1} & w_{1,0} \\ w_{0,2} & w_{0,1} & w_{0,0} \end{matrix}$} \begin{bmatrix} 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & x'_{00} & x'_{01} & 0 & 0 \\ 0 & 0 & x'_{10} & x'_{11} & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 \\ \end{bmatrix}

ConvTranspose2d

torch.nn.ConvTranspose2d

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import torch

#随机生成一个矩阵(图片)
input = torch.randn(1, #batch_size
32, #in_channels
28, #width
28) #height

#定义一个转置卷积
# 4.5 No zero padding, non-unit strides, transposed
# k=2, s=2, p=0 => o'=s(i'-1)+k=2i'
# 输出图片的 size 是输入的 2 倍 28x28→56x56
tranposed_conv = torch.nn.ConvTranspose2d(
in_channels=32, out_channels=16,
kernel_size=2, stride=2
)

#用上述定义的转置卷积对 input 进行卷积操作
output = tranposed_conv(input)

print(input.shape) #torch.Size([1, 32, 28, 28])
print(output.shape) #torch.Size([1, 16, 56, 56])
print(tranposed_conv.weight.shape) #torch.Size([32, 16, 2, 2])

ModuleList

torch.nn.ModuleList

  • 定义一个简单的 ModuleList, 其中包括 4 个 Module

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import torch
    import torch.nn as nn

    modulelist = nn.ModuleList([
    nn.Conv2d(1,20,5),
    nn.ReLU(),
    nn.Conv2d(20,64,5),
    nn.ReLU()
    ])

    # 输出 ModuleList
    print(modulelist)
    # ModuleList(
    # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    # (1): ReLU()
    # (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    # (3): ReLU()
    # )

    # 输出 ModuleList 的类型
    print(modulelist.type)
    # <bound method Module.type of ModuleList(
    # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    # (1): ReLU()
    # (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    # (3): ReLU()
    # )>
  • 可以迭代

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    # 使用 for 循环迭代输出 ModuleList 中的 Module
    for i in modulelist:
    print(i)
    # Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    # ReLU()
    # Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    # ReLU()

    # 使用 enumerate 输出 modulelist 中的元素
    for i, module in enumerate(modulelist):
    print(i, module)
    # 0 Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    # 1 ReLU()
    # 2 Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    # 3 ReLU()
  • 可以使用 [] 中括号来访问 ModuleList 中的元素, 下标从 0 开始, 注意不要越界

    1
    2
    3
    4
    5
    6
    7
    # 使用 [] 访问 ModuleList 中的元素
    for i in range(4):
    print(modulelist[i])
    # Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    # ReLU()
    # Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    # ReLU()
  • 借助 ModuleList 来定义一个简单的网络模型

    1
    2
    3
    4
    5
    6
    7
    8
    class Net(nn.Module):
    def __init__(self) -> None:
    super().__init__()

    def forward(self, x):
    for module in modulelist:
    x = module(x)
    return x

    测试这个模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    #输入 batch_size=1, channels=1, width=100, height=100
    input = torch.randn(1, 1, 100, 100)

    #网络实例
    net = Net()

    #输出
    output = net.forward(input)

    print(input.shape) #torch.Size([1, 1, 100, 100])
    print(output.shape) #torch.Size([1, 64, 92, 92])

Sequential

torch.nn.Sequential

Sequential 和 ModuleList 用法非常相似, 不同的地方在于 ModuleList 只是单纯的一个 Module 的 List, 而 Sequential 则将其中的 Module 按顺序串联成一个模型. Pytorhc 中可以直接调用定义好的 Sequential, 而 ModuleList 则不行.

  • 定义一个简单的 Sequential Container
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import torch
import torch.nn as nn

sequential = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)

# 输出 sequential 本身
print(sequential)
# Sequential(
# (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# (1): ReLU()
# (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# (3): ReLU()
# )

# 输出 sequential 的类型
print(sequential.type)
# <bound method Module.type of Sequential(
# (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# (1): ReLU()
# (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# (3): ReLU()
# )>
  • 可以迭代
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 使用 for 循环输出 sequential 中的元素
for i in sequential:
print(i)
# Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# ReLU()
# Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# ReLU()

# 使用 enumerate 输出 sequential 中的元素
for i,j in enumerate(sequential):
print(i, j)
# 0 Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# 1 ReLU()
# 2 Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# 3 ReLU()
  • 可以使用 [] 中括号来访问 Sequential 中的元素, 下标从 0 开始, 注意不要越界
1
2
3
4
5
6
for i in range(4):
print(sequential[i])
# Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# ReLU()
# Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# ReLU()
  • Sequential 本身已经是一个模型, 可以直接调用
1
2
3
4
5
6
#输入 batch_size=1, channels=1, width=100, height=100
input = torch.randn(1, 1, 100, 100)
output = sequential(input)

print(input.shape) #torch.Size([1, 1, 100, 100])
print(output.shape) #torch.Size([1, 64, 92, 92])

functional

torch.nn.functional

one_hot

torch.nn.functional.one_hot

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import torch
from torch.nn import functional as F

x = torch.arange(0, 5) % 3
# tensor([0, 1, 2, 0, 1])

F.one_hot(x)
# tensor([[1, 0, 0],
# [0, 1, 0],
# [0, 0, 1],
# [1, 0, 0],
# [0, 1, 0]])

F.one_hot(x, num_classes=5)
# tensor([[1, 0, 0, 0, 0],
# [0, 1, 0, 0, 0],
# [0, 0, 1, 0, 0],
# [1, 0, 0, 0, 0],
# [0, 1, 0, 0, 0]])

y = torch.arange(0, 6).reshape(3,2) % 4
print(y.shape, '\n', y)
# torch.Size([3, 2])
# tensor([[0, 1],
# [2, 3],
# [0, 1]])

Y = F.one_hot(y, 4)
print(Y.shape, '\n', Y)
# torch.Size([3, 2, 4])
# tensor([[[1, 0, 0, 0],
# [0, 1, 0, 0]],

# [[0, 0, 1, 0],
# [0, 0, 0, 1]],

# [[1, 0, 0, 0],
# [0, 1, 0, 0]]])

nn.Dropout

torch.nn.Dropout

torch.autocast

AUTOMATIC MIXED PRECISION PACKAGE - TORCH.AMP{target="_blank"}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

# loading data
trainset = datasets.MNIST(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)
trainloader = DataLoader(dataset=trainset, batch_size=16)

# linear classifier: 28*28 -> 10
model = torch.nn.Linear(in_features=28*28, out_features=10)

# initializing loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training
for epoch in range(3):
for images, labels in tqdm(trainloader, desc=f"epoch {epoch}"):
# images.shape: [16, 1, 28, 28], labels.shape: [16]
inputs = images.view(images.size(0), -1)
# inputs.shape: [16, 784]

optimizer.zero_grad()

# enables autocasting for the forward pass (model + loss)
with torch.autocast(device_type="cpu", dtype=torch.bfloat16):
output = model(inputs)
loss = criterion(output, labels)

# forward without autocasting
# output = model(inputs)
# loss = criterion(output, labels)

# backward
loss.backward()
optimizer.step()

Torchvision

torchvision.transforms

  • Documents
    Transforming and augmenting images
    Getting started with transforms v2

  • Demonstraction

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    import torch
    from torchvision.transforms import v2
    from PIL import Image
    import matplotlib.pyplot as plt

    # You can download the astronaut.jpg image from
    # https://github.com/pytorch/vision/blob/main/gallery/
    # and convert annotation.json file to mask by yourself
    path2image = r"./coco/images/astronaut.jpg"
    path2mask = r"./mask.png"

    TransformsList = [
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5), # posibility = 0.5
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # 3 means and stds for 3 channels
    ]

    ToFloatImage = v2.Compose([
    v2.ToImage(), # Convert PIL Image to Torchvision Image
    v2.ToDtype(torch.float32, scale=True) # Covert uint8 to float32
    ])

    # read image and mask
    image = Image.open(path2image) # PIL Image
    mask = Image.open(path2mask) # PIL Image
    # convet PIL Image to Torchvison Image
    image = ToFloatImage(image) # Float Torchvision Image
    mask = ToFloatImage(mask) # Float Torchvision Image

    plt.close()
    _, axes = plt.subplots(
    nrows=2, ncols=len(TransformsList)+1,
    figsize=(3*(len(TransformsList)+1), 3*2) # (size*cols, size*rows)
    )

    axes[0][0].imshow(image.permute(1,2,0)) # 3×H×W -permute-> H×W×3
    axes[0][0].set_title("Original")
    axes[1][0].imshow(mask.permute(1,2,0)) # 3×H×W -permute-> H×W×3

    torch.manual_seed(1)
    for i, transform in enumerate(TransformsList):
    '''Don't transform image and mask separately
    when transforms contain ramdom operators'''
    # trans_image = transform(image) # <-- Don't
    # trans_mask = transform(mask) # <-- Don't
    trans_image, trans_mask = transform(image, mask) # <-- Instead
    print(f"{type(transform).__name__}: {trans_image.shape = }")
    print(f"{type(transform).__name__}: {trans_mask.shape = }")
    # RandomResizedCrop: trans_image.shape = torch.Size([3, 224, 224])
    # RandomResizedCrop: trans_mask.shape = torch.Size([1, 224, 224])
    # RandomHorizontalFlip: trans_image.shape = torch.Size([3, 512, 512])
    # RandomHorizontalFlip: trans_mask.shape = torch.Size([1, 512, 512])
    # Normalize: trans_image.shape = torch.Size([3, 512, 512])
    # Normalize: trans_mask.shape = torch.Size([3, 512, 512]) <-- Note the channels of mask

    axes[0][i+1].imshow(trans_image.permute(1,2,0)) # 3×H×W -permute-> H×W×3
    axes[0][i+1].set_title(type(transform).__name__)
    axes[1][i+1].imshow(trans_mask.permute(1,2,0)) # 3×H×W -permute-> H×W×3
    plt.show()

    astronaut-mask-transforms

transforms.v2.Compose

  • Documents
    torchvision.transforms.v2.Compose

  • Demonstration

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from torchvision.transforms import v2
    transforms = v2.Compose([
    # Convert to tensor, only needed if you had a PIL image
    v2.ToImage(),
    # optional, most input are already uint8 at this point
    v2.ToDtype(torch.uint8, scale=True),
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    # Or v2.Resize(antialias=True),
    # Normalize expects float input
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
  • Trick

    1
    2
    3
    4
    5
    6
    7
    8
    from torch import nn
    from torchvision.transforms import v2
    translist = nn.ModuleList([
    ...
    ])
    transform = v2.Compose([transform for transform in translist])
    for trans in translist:
    pass

v2-api-reference

transforms.v2.ToTensor

  • Documents
    torchvision.transforms.v2.ToTensor

    Use the command below instead

    1
    2
    3
    4
    v2.Compose([
    v2.ToImage(), # Convert PIL Image to Torchvision Image
    v2.ToDtype(torch.float32, scale=True) # Covert uint8 to float32
    ])

transforms.v2.ToImage

transforms.v2.Normalize

  • Documents
    torchvision.transforms.v2.Normalize

  • Some understanding
    If the input image has n channels and v2.Normalize has n means and stds

    1
    v2.Normalize(mean=[m1, ..., mn], std=[s1, ..., sn])

    The v2.Normalize dose Normalization on every channel of the input image (or array) with the corresponding mean and std

    channel(i)=(channel(i)mean(i))/std(i),i=1,,nchannel(i) = (channel(i) - mean(i)) / std(i), \quad i = 1, \cdots, n

    If the input image only has 1 channles and v2.Normalize has n (more than 1) means and stds.
    v2.Normalize will return n channels

    channel(i)=(channelmean(i))/std(i),i=1,,nchannel(i) = (channel - mean(i)) / std(i), \quad i = 1, \cdots, n

  • Demonstraction

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    import torch
    from torchvision.transforms import v2

    channel1 = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
    channel2 = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    t = torch.tensor([channel1, channel2], dtype=torch.float32)
    print(t)
    # tensor([[[1., 0., 0.],
    # [0., 1., 0.],
    # [0., 0., 1.]],

    # [[0., 0., 1.],
    # [0., 1., 0.],
    # [1., 0., 0.]]])

    '''
    channle 1:
    1: (1 - mean(0))/std(1) = 1
    0: (0 - mean(0))/std(1) = 0
    channle 2:
    1: (1 - mean(1))/std(1) = 0
    0: (0 - mean(1))/std(1) = -1
    '''
    transform = v2.Normalize(mean=[0, 1], std=[1, 1])
    trans_t = transform(t)
    print(trans_t)
    # tensor([[[ 1., 0., 0.],
    # [ 0., 1., 0.],
    # [ 0., 0., 1.]],

    # [[-1., -1., 0.],
    # [-1., 0., -1.],
    # [ 0., -1., -1.]]])

    '''
    channle 1:
    1: (1 - mean(0))/std(0.5) = 2
    0: (0 - mean(0))/std(0.5) = 0
    channle 2:
    1: (1 - mean(0))/std(2) = 0.5
    0: (0 - mean(0))/std(2) = 0
    '''
    transform = v2.Normalize(mean=[0, 0], std=[0.5, 2])
    trans_t = transform(t)
    # Limit the number of elements shown
    torch.set_printoptions(precision=1)
    print(trans_t)
    # tensor([[[2.0, 0.0, 0.0],
    # [0.0, 2.0, 0.0],
    # [0.0, 0.0, 2.0]],

    # [[0.0, 0.0, 0.5],
    # [0.0, 0.5, 0.0],
    # [0.5, 0.0, 0.0]]])
  • Paper that proposed Batch Normalization
    Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift

torchvision.datasets

MNIST

  • Documents
    torchvision.datasets.MNIST

  • Extended reading
    MNIST的均值和方差(0.1307,), (0.3081,)是怎么计算出来的?

  • Note
    MNIST所继承的VisionDataset是制作与torchvison相兼容数据的基类, VisionDataset依然是Dataset的子类

  • Demonstraction

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    from torchvision import datasets
    import random

    # train=True 读取训练集
    train_dataset = datasets.MNIST(root='../dataset/mnist/',
    train=True,
    download=False, #第一次运行设为True下载数据集
    transform=None)
    # train=False 读取测试集
    test_dataset = datasets.MNIST(root='..dataset/mnist/',
    train=False,
    download=False, #第一次运行设为True下载数据集
    transform=None)

    print(type(train_dataset)) # <class 'torchvision.datasets.mnist.MNIST'>
    print(len(train_dataset)) # 60000
    print(len(test_dataset)) # 10000

    item = random.choice(train_dataset)
    print(type(item)) # <class 'tuple'>
    print(item) # (<PIL.Image.Image image mode=L size=28x28 at 0x1CF74469760>, 5)
  • Note
    train_datasettest_dataset是两个迭代器, 其中的元素为元组;
    train_dataset包含60000个元组; test_dataset包含10000个元组;
    每个元组都包含2个元素, 其中第一个元素是图片, 第二个元素是图片的标签.

CIFAR10

该数据集共有 60000 张彩色图像,这些图像是 32*32,分为 10 个类,每类 6000 张图。
这里面有 50000 张用于训练,构成了 5 个训练批,每一批 10000 张图;另外 10000 用于测试,单独构成一批。
测试批的数据里,取自 10 类中的每一类,每一类随机取 1000 张,一共 10000 张,剩下的 50000 张图像就随机排列组成了训练批。
注意一个训练批中的各类图像并不一定数量相同,但总训练集的 50000 张图像中每一类都有 5000 张图。

torch.optim

torch.optim | Pytorch Docs{target="_blank"}

torch.optim 是pytorch的一个工具包, 里面包含了常见的神经网络的优化算法

下面以torch.optim.SGD为例, 介绍如何使用这个工具包

torch.optim.SGD

SGD为随机梯度下降法(stochastic gradient descent)

torch.optim.SGD | Pytorch Docs{target="_blank"}

  • 自定义一个数据集

这个数据集有108个样本, 每个样本都是[0,1)[0,1)上均匀生成的一个64维向量, 每个样本对应一个16维的标签

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from torch.utils.data import Dataset
#构建一个自定义数据集的类,继承Dataset类
class CustomDataset(Dataset):
def __init__(self, input, target):
self.input, self.target = input, target

def __getitem__(self, index):
return self.input[index], self.target[index]

def __len__(self):
return len(self.input)

import torch
#构建一个简单的数据
input = torch.rand([108,64]) #108个样本, 每个样本是一个64维的向量
target = torch.rand([108,16]) #108个标签, 每个标签是一个16维的向量

#通过CustomDataset获取经过Dataset封装的可迭代的数据集
dataset = CustomDataset(input, target)

from torch.utils.data import DataLoader
#加载数据集
dataset_loaded = DataLoader(dataset, batch_size=18)
  • 自定义一个模型
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch.nn as nn
import torch.nn.functional as F
#构建一个简单的两层的MLP
class Model(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(64, 32)
self.layer2 = nn.Linear(32, 16)

def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
return x

#模型实例
model = Model()
#损失函数
loss_fn = nn.MSELoss(reduction="mean")
  • 使用使用torch.optim.SGD对上面自定义的数据和模型进行训练
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch.optim as optim
#将模型的参数传入,构建优化器
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# 如果需要给不同层的参数设置不同的步长可以如下调用optim.SGD
# optimizer = optim.SGD([
# {'params': model.layer1.parameters()},
# {'params': model.layer2.parameters(), 'lr': 1e-3}
# ], lr=1e-2, momentum=0.9)

for input, target in dataset_loaded:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()

zero_grad
torch.optim.Optimizer.zero_grad

step
torch.optim.Optimizer.step

TensorBoard

PyTorch Profiler With TensorBoard