关于image的shape是CxHxW还是HxWxC

H×W×C Or C×H×W

from PIL import Image
import numpy as np
from torchvision import transforms

image_path = r"path/to/image"
mask_path = r"path/to/mask"

img = Image.open(image_path) # size: (width, height)
mask = Image.open(mask_path) # size: (width, height)

np_img = np.array(img) # shape: (height, width, 3)
np_mask = np.array(img) # shape: (height, width, 3)

ts_img = transforms.PILToTensor()(img) # shape: torch.Size([3, height, width])
ts_mask = transforms.PILToTensor()(mask) # shape: torch.Size([1, height, width])

在打印上，[C, H, W] 的图像数据要比 [H, W, C] 的图像数据更加直观，一个矩阵表示一个 Channel，三个矩阵分别对应 RGB 三个通道的值。

print(np_img[0:5, 0:4, :]) # clip shape: 5×4×3
# [[[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]]

print(ts_img[:, 0:4, 0:5]) # clip shape: 3×5×4
# tensor([[[244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244]],

#         [[243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243]],

#         [[248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248]]], dtype=torch.uint8)