transform = v2.Compose([ # PILToTensor() convert a PIL Image with shape [H, W, C] and type uint8 # to a torch tensor with shape [C, H, W] and type torch.uint8 v2.PILToTensor(), # ToDtype() convert a uint8 tensor in the range [0, 255] to # a float32 tensor in the range [0.0, 1.0] v2.ToDtype(torch.float32, scale=True), # Normalize() normalize a tensor by using the equation below: # output[channel] = (input[channel] - mean[channel]) / std[channel] v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])
'''ToDtype() and Normalize() will not be applied on ann, only PILToTensor() applys on ann.''' print(torch.equal(v2.PILToTensor()(ann), ann_trans)) # True
'''squeeze the annatation to [H, W] from [1, H, W] and convert it to long type from uint8''' ann = ann.squeeze(0).to(torch.long)
from torchvision.transforms import v2 import torch import cv2
path2img = "path/to/img" path2ann = "path/to/ann"
img = cv2.imread(path2img, cv2.IMREAD_COLOR_RGB) ann = cv2.imread(path2ann, cv2.IMREAD_GRAYSCALE)
'''convert numpy array to torch tensor''' img = torch.from_numpy(img).permute(2, 0, 1) '''convert ann dtype to long(int64) from uint8''' ann = torch.from_numpy(ann).to(torch.long)
transform = v2.Compose([ # ToDtype() convert a uint8 tensor in the range [0, 255] to # a float32 tensor in the range [0.0, 1.0] v2.ToDtype(torch.float32, scale=True), # Normalize() normalize a tensor by using the equation below: # output[channel] = (input[channel] - mean[channel]) / std[channel] v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])
UserWarning: The transform ToTensor() is deprecated and will be removed in a future release. Instead, please use v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]).Output is equivalent up to float precision.