Commonly used Modules in deep learning

ExifTool

Pillow

PIL.Image.open

1 2	conda install pillow import PIL

open images in a folder

import os
import glob
from PIL import Image
import matplotlib.pyplot as plt

path2image = r"path/to/images"
path2mask = r"path/to/masks"

image_paths = glob.glob(os.path.join(path2image, '*.png'))
image_names = [os.path.basename(result_path) for result_path in image_paths]
# Take the first 10 images
image_names = image_names[0:10]

plt.close()
nrows = len(image_names); ncols = 2
fig, axes = plt.subplots(nrows, ncols, figsize=(5*ncols, 5*nrows))
axes[0,0].set_title('Original Image')
axes[0,1].set_title('Original Mask')
for i, name in enumerate(image_names):
    image_path = os.path.join(path2image, name)
    mask_path = os.path.join(path2mask, name)

    image = Image.open(image_path)
    mask = Image.open(mask_path)

    axes[i,0].imshow(image)
    axes[i,1].imshow(mask, cmap='viridis')
plt.show()

H×W×3 Or 3×H×W

from PIL import Image
import numpy as np
from torchvision import transforms

image_path = r"path/to/image"
mask_path = r"path/to/mask"

img = Image.open(image_path) # size: (width, height)
mask = Image.open(mask_path) # size: (width, height)

np_img = np.array(img) # shape: (height, width, 3)
np_mask = np.array(img) # shape: (height, width, 3)

ts_img = transforms.PILToTensor()(img) # shape: torch.Size([3, height, width])
ts_mask = transforms.PILToTensor()(mask) # shape: torch.Size([1, height, width])

print(np_img[0:5, 0:4, :]) # clip shape: 5×4×3
# [[[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]

#  [[244 243 248]
#   [244 243 248]
#   [244 243 248]
#   [244 243 248]]]

print(ts_img[:, 0:4, 0:5]) # clip shape: 3×5×4
# tensor([[[244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244],
#          [244, 244, 244, 244, 244]],

#         [[243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243],
#          [243, 243, 243, 243, 243]],

#         [[248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248],
#          [248, 248, 248, 248, 248]]], dtype=torch.uint8)

PIL Image to numpy array and torch tensor

from PIL import Image
import numpy as np
import torch as tc
from torchvision import transforms
import matplotlib.pyplot as plt

path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"

image = Image.open(path2image)
mask = Image.open(path2mask)
print(f"{image.size = }, {image.mode = }")
print(f"{mask.size = }, {mask.mode = }")
# image.size = (512, 512), image.mode = 'RGB'
# mask.size = (512, 512), mask.mode = 'L'

'''PIL image to numpy array'''
npimg = np.array(image)
npmask = np.array(mask)
print(f"{npimg.shape = }\n{npmask.shape = }")
# npimg.shape = (512, 512, 3)
# npmask.shape = (512, 512)

'''PIL image to torch tensor'''
tcimg = transforms.PILToTensor()(image)
tcmask = transforms.PILToTensor()(mask)
print(f"{tcimg.shape = }\n{tcmask.shape = }")
# tcimg.shape = torch.Size([3, 512, 512])
# tcmask.shape = torch.Size([1, 512, 512])

'''torch tensor to numpy array'''
# tc2npimg = np.array(tcimg.numpy())
# tc2npmask = np.array(tcmask.numpy())
tc2npimg = np.array(tcimg)
tc2npmask = np.array(tcmask)
print(f"{tc2npimg.shape = }\n{tc2npmask.shape = }")
# tc2npimg.shape = (3, 512, 512)
# tc2npmask.shape = (1, 512, 512)

'''numpy array to troch tensor'''
np2tcimg = tc.from_numpy(npimg)
np2tcmask = tc.from_numpy(npmask)
print(f"{np2tcimg.shape = }\n{np2tcmask.shape = }")
# np2tcimg.shape = torch.Size([512, 512, 3])
# np2tcmask.shape = torch.Size([512, 512])

'''Plot HxWxC and CxWxH'''
_, axes = plt.subplots(nrows=2, ncols=4, figsize=(12,6))
'''
When ploting, Whether it's numpy array or torch tensor,
you need to make sure that their shape is HxW or HxWxC (even C=1)
'''
axes[0,0].imshow(npimg) # (512, 512, 3)
axes[0,1].imshow(tcimg.permute(1,2,0)) # [3, 512, 512] -> [512, 512, 3]
axes[0,2].imshow(np2tcimg) # [512, 512, 3]
axes[0,3].imshow(tc2npimg.transpose(1,2,0)) # (3, 512, 512) -> (512, 512, 3)
axes[1,0].imshow(npmask) # (512, 512)
axes[1,1].imshow(tcmask.permute(1,2,0)) # [1, 512, 512] -> [512, 512, 1]
axes[1,2].imshow(np2tcmask) # [512, 512]
axes[1,3].imshow(tc2npmask.transpose(1,2,0)) # (1, 512, 512) -> (512, 512, 1)
plt.show()

OpenCV/cv2

OpenCV docs

1 2	conda install opencv import cv2

Why does cv2 return a 3-channel image form a single-channel image?

1
2
3

'''0 for grayscale, 1 for BGR image'''
mask = cv2.imread(mask_path, 0) # shape: (1024, 1024)
mask = cv2.imread(mask_path, 1) # shape: (1024, 1024, 3)

PIL vs. matplotlib vs. cv2

What is the difference of opening a mask image with PIL and cv2?

import numpy as np
import matplotlib
import PIL
import cv2

# exiftool> Color Type: Grayscale
mask_path = r"Path\to\top_potsdam_2_10_0_0.png"

'''PIL (recommend)'''
mask = PIL.Image.open(mask_path) # size: (1024, 1024)
colors, counts = np.unique(mask, return_counts=True)
print(colors, counts)
# [0 1 2 3 4 5 6]
# [468491 157674  94961 255461  10278    783  60928]

'''matplotlib'''
mask = matplotlib.image.imread(mask_path)  # shape: (1024, 1024)
colors, counts = np.unique(mask, return_counts=True)
print([float(f'{color:0.4f}') for color in colors], counts)
# [0.0, 0.0039, 0.0078, 0.0118, 0.0157, 0.0196, 0.0235]
# [0,    1/255,  2/255,  3/255,  4/255,  5/255,  6/255] <--
# [468491 157674  94961 255461  10278    783  60928]
# PNG images are returned as float arrays (0-1). All other formats are returned as int arrays.

'''cv2'''
mask = cv2.imread(mask_path, 0) # shape: (1024, 1024)
print(colors, counts)
# [0 1 2 3 4 5 6]
# [468491 157674  94961 255461  10278    783  60928]

Image saving

Saving PNG images with PIL is 4 times slower than saving them with OpenCV

Albumentations

Docs
albumentations
Mask augmentation for segmentation

Demo

import numpy as np
import albumentations as A
from PIL import Image
import matplotlib.pyplot as plt

# You can download the astronaut.jpg image from
# https://github.com/pytorch/vision/blob/main/gallery/
# and convert annotation.json file to mask by yourself
path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"

TransformsList = [
    A.RandomBrightnessContrast(brightness_limit=0.25, contrast_limit=0.25, p=1),
    A.RandomRotate90(p=1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255)
]

# read image and mask
image = Image.open(path2image) # PIL Image
mask = Image.open(path2mask) # PIL Image
# convet PIL Image to numpy array
image = np.array(image) # shape: HxWx3
mask = np.array(mask) # shape: HxWx3

plt.close()
_, axes = plt.subplots(
    nrows=2, ncols=len(TransformsList)+1,
    figsize=(3*(len(TransformsList)+1), 3*2) # (size*cols, size*rows)
)

axes[0][0].imshow(image)
axes[0][0].set_title("Original")
axes[1][0].imshow(mask)

for i, transform in enumerate(TransformsList):
    '''Don't transform image and mask separately 
    when transforms contain ramdom operators'''
    # trans_image = transform(image) # <-- Don't
    # trans_mask = transform(mask)  # <-- Don't
    transfromed = transform(image=image.copy(), mask=mask.copy()) # <-- Instead
    trans_image = transfromed['image']
    trans_mask = transfromed['mask']
    print(f"{type(transform).__name__}: {trans_image.shape = }")
    print(f"{type(transform).__name__}: {trans_mask.shape = }")
    # RandomBrightnessContrast: trans_image.shape = (512, 512, 3)
    # RandomBrightnessContrast: trans_mask.shape = (512, 512)
    # RandomRotate90: trans_image.shape = (512, 512, 3)
    # RandomRotate90: trans_mask.shape = (512, 512)
    # Normalize: trans_image.shape = (512, 512, 3)
    # Normalize: trans_mask.shape = (512, 512) <-- Note the channels of mask

    axes[0][i+1].imshow(trans_image)
    axes[0][i+1].set_title(type(transform).__name__)
    axes[1][i+1].imshow(trans_mask)
plt.show()

astronaut-mask-albumtations

compose

Docs

Mask augmentation for segmentation

Demo

import numpy as np
import albumentations as A
from PIL import Image
import matplotlib.pyplot as plt

# You can download the astronaut.jpg image from
# https://github.com/pytorch/vision/blob/main/gallery/
# and convert annotation.json file to mask by yourself
path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"

# read image and mask
image = Image.open(path2image) # PIL Image
mask = Image.open(path2mask) # PIL Image
# convet PIL Image to numpy array
image = np.array(image) # shape: HxWx3
mask = np.array(mask) # shape: HxWx3

transform = A.Compose([
    A.RandomCrop(width=224, height=224),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])

transformed = transform(image=image, mask=mask)
trans_image = transformed['image']
trans_mask = transformed['mask']

plt.close()
_, axes = plt.subplots(
    nrows=1, ncols=4,
    figsize=(3*4, 3*1) # (size*cols, size*rows)
)
axes[0].imshow(image)
axes[0].set_title("orginal image")
axes[1].imshow(trans_image)
axes[1].set_title("transformed image")
axes[2].imshow(mask)
axes[2].set_title("orginal mask")
axes[3].imshow(trans_mask)
axes[3].set_title("transformed mask")

astronaut-mask-albumtations-compose

Normalization

Intro

import albumentations as A
A.Normalize(
  mean=(0.485, 0.456, 0.406),
  std=(0.229, 0.224, 0.225),
  max_pixel_value=255.0,
  always_apply=False,
  p=1.0
)

Docs
augmentations.Normalize
Understanding
Normalization is applied by the formula:

$img = \left(\frac{img}{max\_pixel\_value}-mean\right)/std$

Demo

# import torch
import numpy as np
import albumentations as A

channel1 = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
channel2 = [[0, 0, 2], [0, 2, 0], [2, 0, 0]]
arr = np.array([channel1, channel2]) # C×H×W: (2, 3, 3)
print(arr)
# [[[1 0 0]
#   [0 1 0]
#   [0 0 1]]

#  [[0 0 2]
#   [0 2 0]
#   [2 0 0]]]
arr = arr.transpose(1, 2, 0) # H×W×C: (3, 3, 2)
print(arr)
# [[[1 0]
#   [0 0]
#   [0 2]]

#  [[0 0]
#   [1 2]
#   [0 0]]

#  [[0 2]
#   [0 0]
#   [1 0]]]

'''
channle 1:
1: (1/max(255) - mean(0))/std(1) = 0.0039
0: (0/max(255) - mean(0))/std(1) = 0
channle 2:
2: (2/max(255) - mean(1))/std(2) = -0.4961
0: (0/max(255) - mean(1))/std(2) = -0.5
'''
transform = A.Normalize(mean=[0, 1], std=[1, 2])
# Limit the number of elements shown: reserve 4 decimal places
np.set_printoptions(precision=4)
trans_arr = transform(image=arr)
print(trans_arr)
# {'image': array([[[ 0.0039, -0.5   ],
#                   [ 0.    , -0.5   ],
#                   [ 0.    , -0.4961]],

#                  [[ 0.    , -0.5   ],
#                   [ 0.0039, -0.4961],
#                   [ 0.    , -0.5   ]],

#                  [[ 0.    , -0.4961],
#                   [ 0.    , -0.5   ],
#                   [ 0.0039, -0.5   ]]], dtype=float32)}

Detectron2

detectron2

timm

Path to website
Pytorch Image Models (timm)

How to fix pretrained=True

LocalEntryNotFoundError when loading downloaded pretrained model using timm.create_model

import timm
# model = timm.create_model('resnet34', pretrained=True)
model = timm.create_model(
  'resnet34',
  pretrained=True,
  pretrained_cfg_overlay=dict(file=r'path\to\checkpoint'),
)
model.eval()

Where checkpoint can be *.safetensors, *.bin, *.pth, *.pt, *.ckpt etc files.

How to get the location where pretrained models were downloaded

Print the constant HUGGINGFACE_HUB_CACHE

from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
print(HUGGINGFACE_HUB_CACHE)
# windows: C:\Users\*\.cache\huggingface\hub
# colab: /root/.cache/huggingface/hub

Print the detailed report of caches
You can also print a detailed report directly from the huggingface-cli using:

1	huggingface-cli scan-cache

(conda_env) ~>huggingface-cli scan-cache
REPO ID               REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
--------------------- --------- ------------ -------- ------------- ------------- ---- -------------------------------------------------------
timm/resnet34.a1_in1k model            87.3M        2 4 hours ago   4 hours ago   main ~\.cache\huggingface\hub\models--timm--resnet34.a1_in1k

Done in 0.0s. Scanned 1 repo(s) for a total of 87.3M.

The folder structure of cache, take timm/resnet34.a1_in1k for example
Manage huggingface_hub cache-system

C:\Users\*\.cache\huggingface\hub
├── models--timm--resnet34.a1_in1k
|   ├── blobs
|   ├── refs
|   |   └── main
|   └── snapshots
|       └── 19cbb59c79ac0ee9142c596cdf336d3beb9683d2
|           ├── config.json
|           └── model.safetensors

Usage

requests.exceptions.SSLError:(MaxRetryError("HTTPSConnectionPool(host=‘huggingface.co’, port=443)

import os
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

import timm
model = timm.create_model('hf-hub:timm/resnet34.a1_in1k', pretrained=True)
model.eval()

einops

rearrange

einops.rearrange