H×W×C Or C×H×W

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from PIL import Image
import numpy as np
from torchvision import transforms

image_path = r"path/to/image"
mask_path = r"path/to/mask"

img = Image.open(image_path) # size: (width, height)
mask = Image.open(mask_path) # size: (width, height)

np_img = np.array(img) # shape: (height, width, 3)
np_mask = np.array(img) # shape: (height, width, 3)

ts_img = transforms.PILToTensor()(img) # shape: torch.Size([3, height, width])
ts_mask = transforms.PILToTensor()(mask) # shape: torch.Size([1, height, width])

在打印上,[C, H, W] 的图像数据要比 [H, W, C] 的图像数据更加直观,一个矩阵表示一个 Channel,三个矩阵分别对应 RGB 三个通道的值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
print(np_img[0:5, 0:4, :]) # clip shape: 5×4×3
# [[[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]]

print(ts_img[:, 0:4, 0:5]) # clip shape: 3×5×4
# tensor([[[244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244]],

# [[243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243]],

# [[248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248]]], dtype=torch.uint8)