ExifTool

ExifTool Command-Line Examples

Pillow

Pillow documentation

PIL.Image.open

PIL.Image.save
PIL.Image.convert

1
2
conda install pillow
import PIL

open images in a folder

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os
import glob
from PIL import Image
import matplotlib.pyplot as plt

path2image = r"path/to/images"
path2mask = r"path/to/masks"

image_paths = glob.glob(os.path.join(path2image, '*.png'))
image_names = [os.path.basename(result_path) for result_path in image_paths]
# Take the first 10 images
image_names = image_names[0:10]

plt.close()
nrows = len(image_names); ncols = 2
fig, axes = plt.subplots(nrows, ncols, figsize=(5*ncols, 5*nrows))
axes[0,0].set_title('Original Image')
axes[0,1].set_title('Original Mask')
for i, name in enumerate(image_names):
image_path = os.path.join(path2image, name)
mask_path = os.path.join(path2mask, name)

image = Image.open(image_path)
mask = Image.open(mask_path)

axes[i,0].imshow(image)
axes[i,1].imshow(mask, cmap='viridis')
plt.show()

H×W×3 Or 3×H×W

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from PIL import Image
import numpy as np
from torchvision import transforms

image_path = r"path/to/image"
mask_path = r"path/to/mask"

img = Image.open(image_path) # size: (width, height)
mask = Image.open(mask_path) # size: (width, height)

np_img = np.array(img) # shape: (height, width, 3)
np_mask = np.array(img) # shape: (height, width, 3)

ts_img = transforms.PILToTensor()(img) # shape: torch.Size([3, height, width])
ts_mask = transforms.PILToTensor()(mask) # shape: torch.Size([1, height, width])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
print(np_img[0:5, 0:4, :]) # clip shape: 5×4×3
# [[[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]

# [[244 243 248]
# [244 243 248]
# [244 243 248]
# [244 243 248]]]

print(ts_img[:, 0:4, 0:5]) # clip shape: 3×5×4
# tensor([[[244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244],
# [244, 244, 244, 244, 244]],

# [[243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243],
# [243, 243, 243, 243, 243]],

# [[248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248],
# [248, 248, 248, 248, 248]]], dtype=torch.uint8)

PIL Image to numpy array and torch tensor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from PIL import Image
import numpy as np
import torch as tc
from torchvision import transforms
import matplotlib.pyplot as plt

path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"

image = Image.open(path2image)
mask = Image.open(path2mask)
print(f"{image.size = }, {image.mode = }")
print(f"{mask.size = }, {mask.mode = }")
# image.size = (512, 512), image.mode = 'RGB'
# mask.size = (512, 512), mask.mode = 'L'

'''PIL image to numpy array'''
npimg = np.array(image)
npmask = np.array(mask)
print(f"{npimg.shape = }\n{npmask.shape = }")
# npimg.shape = (512, 512, 3)
# npmask.shape = (512, 512)

'''PIL image to torch tensor'''
tcimg = transforms.PILToTensor()(image)
tcmask = transforms.PILToTensor()(mask)
print(f"{tcimg.shape = }\n{tcmask.shape = }")
# tcimg.shape = torch.Size([3, 512, 512])
# tcmask.shape = torch.Size([1, 512, 512])

'''torch tensor to numpy array'''
# tc2npimg = np.array(tcimg.numpy())
# tc2npmask = np.array(tcmask.numpy())
tc2npimg = np.array(tcimg)
tc2npmask = np.array(tcmask)
print(f"{tc2npimg.shape = }\n{tc2npmask.shape = }")
# tc2npimg.shape = (3, 512, 512)
# tc2npmask.shape = (1, 512, 512)

'''numpy array to troch tensor'''
np2tcimg = tc.from_numpy(npimg)
np2tcmask = tc.from_numpy(npmask)
print(f"{np2tcimg.shape = }\n{np2tcmask.shape = }")
# np2tcimg.shape = torch.Size([512, 512, 3])
# np2tcmask.shape = torch.Size([512, 512])

'''Plot HxWxC and CxWxH'''
_, axes = plt.subplots(nrows=2, ncols=4, figsize=(12,6))
'''
When ploting, Whether it's numpy array or torch tensor,
you need to make sure that their shape is HxW or HxWxC (even C=1)
'''
axes[0,0].imshow(npimg) # (512, 512, 3)
axes[0,1].imshow(tcimg.permute(1,2,0)) # [3, 512, 512] -> [512, 512, 3]
axes[0,2].imshow(np2tcimg) # [512, 512, 3]
axes[0,3].imshow(tc2npimg.transpose(1,2,0)) # (3, 512, 512) -> (512, 512, 3)
axes[1,0].imshow(npmask) # (512, 512)
axes[1,1].imshow(tcmask.permute(1,2,0)) # [1, 512, 512] -> [512, 512, 1]
axes[1,2].imshow(np2tcmask) # [512, 512]
axes[1,3].imshow(tc2npmask.transpose(1,2,0)) # (1, 512, 512) -> (512, 512, 1)
plt.show()

OpenCV/cv2

OpenCV docs

1
2
conda install opencv
import cv2

Why does cv2 return a 3-channel image form a single-channel image?

1
2
3
'''0 for grayscale, 1 for BGR image'''
mask = cv2.imread(mask_path, 0) # shape: (1024, 1024)
mask = cv2.imread(mask_path, 1) # shape: (1024, 1024, 3)

PIL vs. matplotlib vs. cv2

What is the difference of opening a mask image with PIL and cv2?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
import matplotlib
import PIL
import cv2

# exiftool> Color Type: Grayscale
mask_path = r"Path\to\top_potsdam_2_10_0_0.png"

'''PIL (recommend)'''
mask = PIL.Image.open(mask_path) # size: (1024, 1024)
colors, counts = np.unique(mask, return_counts=True)
print(colors, counts)
# [0 1 2 3 4 5 6]
# [468491 157674 94961 255461 10278 783 60928]

'''matplotlib'''
mask = matplotlib.image.imread(mask_path) # shape: (1024, 1024)
colors, counts = np.unique(mask, return_counts=True)
print([float(f'{color:0.4f}') for color in colors], counts)
# [0.0, 0.0039, 0.0078, 0.0118, 0.0157, 0.0196, 0.0235]
# [0, 1/255, 2/255, 3/255, 4/255, 5/255, 6/255] <--
# [468491 157674 94961 255461 10278 783 60928]
# PNG images are returned as float arrays (0-1). All other formats are returned as int arrays.

'''cv2'''
mask = cv2.imread(mask_path, 0) # shape: (1024, 1024)
print(colors, counts)
# [0 1 2 3 4 5 6]
# [468491 157674 94961 255461 10278 783 60928]

Image saving

Saving PNG images with PIL is 4 times slower than saving them with OpenCV

Albumentations

  • Docs
    albumentations
    Mask augmentation for segmentation

  • Demo

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import numpy as np
    import albumentations as A
    from PIL import Image
    import matplotlib.pyplot as plt

    # You can download the astronaut.jpg image from
    # https://github.com/pytorch/vision/blob/main/gallery/
    # and convert annotation.json file to mask by yourself
    path2image = r"./coco/images/astronaut.jpg"
    path2mask = r"./mask.png"

    TransformsList = [
    A.RandomBrightnessContrast(brightness_limit=0.25, contrast_limit=0.25, p=1),
    A.RandomRotate90(p=1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255)
    ]

    # read image and mask
    image = Image.open(path2image) # PIL Image
    mask = Image.open(path2mask) # PIL Image
    # convet PIL Image to numpy array
    image = np.array(image) # shape: HxWx3
    mask = np.array(mask) # shape: HxWx3

    plt.close()
    _, axes = plt.subplots(
    nrows=2, ncols=len(TransformsList)+1,
    figsize=(3*(len(TransformsList)+1), 3*2) # (size*cols, size*rows)
    )

    axes[0][0].imshow(image)
    axes[0][0].set_title("Original")
    axes[1][0].imshow(mask)

    for i, transform in enumerate(TransformsList):
    '''Don't transform image and mask separately
    when transforms contain ramdom operators'''
    # trans_image = transform(image) # <-- Don't
    # trans_mask = transform(mask) # <-- Don't
    transfromed = transform(image=image.copy(), mask=mask.copy()) # <-- Instead
    trans_image = transfromed['image']
    trans_mask = transfromed['mask']
    print(f"{type(transform).__name__}: {trans_image.shape = }")
    print(f"{type(transform).__name__}: {trans_mask.shape = }")
    # RandomBrightnessContrast: trans_image.shape = (512, 512, 3)
    # RandomBrightnessContrast: trans_mask.shape = (512, 512)
    # RandomRotate90: trans_image.shape = (512, 512, 3)
    # RandomRotate90: trans_mask.shape = (512, 512)
    # Normalize: trans_image.shape = (512, 512, 3)
    # Normalize: trans_mask.shape = (512, 512) <-- Note the channels of mask

    axes[0][i+1].imshow(trans_image)
    axes[0][i+1].set_title(type(transform).__name__)
    axes[1][i+1].imshow(trans_mask)
    plt.show()

    astronaut-mask-albumtations

compose

  • Docs

    Mask augmentation for segmentation

  • Demo

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    import numpy as np
    import albumentations as A
    from PIL import Image
    import matplotlib.pyplot as plt

    # You can download the astronaut.jpg image from
    # https://github.com/pytorch/vision/blob/main/gallery/
    # and convert annotation.json file to mask by yourself
    path2image = r"./coco/images/astronaut.jpg"
    path2mask = r"./mask.png"

    # read image and mask
    image = Image.open(path2image) # PIL Image
    mask = Image.open(path2mask) # PIL Image
    # convet PIL Image to numpy array
    image = np.array(image) # shape: HxWx3
    mask = np.array(mask) # shape: HxWx3

    transform = A.Compose([
    A.RandomCrop(width=224, height=224),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ])

    transformed = transform(image=image, mask=mask)
    trans_image = transformed['image']
    trans_mask = transformed['mask']

    plt.close()
    _, axes = plt.subplots(
    nrows=1, ncols=4,
    figsize=(3*4, 3*1) # (size*cols, size*rows)
    )
    axes[0].imshow(image)
    axes[0].set_title("orginal image")
    axes[1].imshow(trans_image)
    axes[1].set_title("transformed image")
    axes[2].imshow(mask)
    axes[2].set_title("orginal mask")
    axes[3].imshow(trans_mask)
    axes[3].set_title("transformed mask")

    astronaut-mask-albumtations-compose

Normalization

  • Intro

    1
    2
    3
    4
    5
    6
    7
    8
    import albumentations as A
    A.Normalize(
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0,
    always_apply=False,
    p=1.0
    )
  • Docs
    augmentations.Normalize

  • Understanding
    Normalization is applied by the formula:

    img=(imgmax_pixel_valuemean)/stdimg = \left(\frac{img}{max\_pixel\_value}-mean\right)/std

  • Demo

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    # import torch
    import numpy as np
    import albumentations as A

    channel1 = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
    channel2 = [[0, 0, 2], [0, 2, 0], [2, 0, 0]]
    arr = np.array([channel1, channel2]) # C×H×W: (2, 3, 3)
    print(arr)
    # [[[1 0 0]
    # [0 1 0]
    # [0 0 1]]

    # [[0 0 2]
    # [0 2 0]
    # [2 0 0]]]
    arr = arr.transpose(1, 2, 0) # H×W×C: (3, 3, 2)
    print(arr)
    # [[[1 0]
    # [0 0]
    # [0 2]]

    # [[0 0]
    # [1 2]
    # [0 0]]

    # [[0 2]
    # [0 0]
    # [1 0]]]

    '''
    channle 1:
    1: (1/max(255) - mean(0))/std(1) = 0.0039
    0: (0/max(255) - mean(0))/std(1) = 0
    channle 2:
    2: (2/max(255) - mean(1))/std(2) = -0.4961
    0: (0/max(255) - mean(1))/std(2) = -0.5
    '''
    transform = A.Normalize(mean=[0, 1], std=[1, 2])
    # Limit the number of elements shown: reserve 4 decimal places
    np.set_printoptions(precision=4)
    trans_arr = transform(image=arr)
    print(trans_arr)
    # {'image': array([[[ 0.0039, -0.5 ],
    # [ 0. , -0.5 ],
    # [ 0. , -0.4961]],

    # [[ 0. , -0.5 ],
    # [ 0.0039, -0.4961],
    # [ 0. , -0.5 ]],

    # [[ 0. , -0.4961],
    # [ 0. , -0.5 ],
    # [ 0.0039, -0.5 ]]], dtype=float32)}

Detectron2

detectron2

timm

How to fix pretrained=True

LocalEntryNotFoundError when loading downloaded pretrained model using timm.create_model

1
2
3
4
5
6
7
8
import timm
# model = timm.create_model('resnet34', pretrained=True)
model = timm.create_model(
'resnet34',
pretrained=True,
pretrained_cfg_overlay=dict(file=r'path\to\checkpoint'),
)
model.eval()

Where checkpoint can be *.safetensors, *.bin, *.pth, *.pt, *.ckpt etc files.

How to get the location where pretrained models were downloaded

  • Print the constant HUGGINGFACE_HUB_CACHE

    1
    2
    3
    4
    from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
    print(HUGGINGFACE_HUB_CACHE)
    # windows: C:\Users\*\.cache\huggingface\hub
    # colab: /root/.cache/huggingface/hub
  • Print the detailed report of caches
    You can also print a detailed report directly from the huggingface-cli using:

    1
    huggingface-cli scan-cache
    1
    2
    3
    4
    5
    6
    (conda_env) ~>huggingface-cli scan-cache
    REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
    --------------------- --------- ------------ -------- ------------- ------------- ---- -------------------------------------------------------
    timm/resnet34.a1_in1k model 87.3M 2 4 hours ago 4 hours ago main ~\.cache\huggingface\hub\models--timm--resnet34.a1_in1k

    Done in 0.0s. Scanned 1 repo(s) for a total of 87.3M.
  • The folder structure of cache, take timm/resnet34.a1_in1k for example
    Manage huggingface_hub cache-system

    1
    2
    3
    4
    5
    6
    7
    8
    9
    C:\Users\*\.cache\huggingface\hub
    ├── models--timm--resnet34.a1_in1k
    | ├── blobs
    | ├── refs
    | | └── main
    | └── snapshots
    | └── 19cbb59c79ac0ee9142c596cdf336d3beb9683d2
    | ├── config.json
    | └── model.safetensors

Usage

requests.exceptions.SSLError:(MaxRetryError("HTTPSConnectionPool(host=‘huggingface.co’, port=443)

1
2
3
4
5
6
7
import os
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

import timm
model = timm.create_model('hf-hub:timm/resnet34.a1_in1k', pretrained=True)
model.eval()

einops

einops

rearrange

einops.rearrange

1