Commonly used Modules in deep learning
ExifTool
ExifTool Command-Line Examples
Pillow
PIL.Image.save
PIL.Image.convert
1 | conda install pillow |
open images in a folder
1 | import os |
H×W×3 Or 3×H×W
1 | from PIL import Image |
1 | print(np_img[0:5, 0:4, :]) # clip shape: 5×4×3 |
PIL Image to numpy array and torch tensor
1 | from PIL import Image |
OpenCV/cv2
1 | conda install opencv |
Why does cv2 return a 3-channel image form a single-channel image?
1 | '''0 for grayscale, 1 for BGR image''' |
PIL vs. matplotlib vs. cv2
What is the difference of opening a mask image with PIL and cv2?
1 | import numpy as np |
Image saving
Saving PNG images with PIL is 4 times slower than saving them with OpenCV
Albumentations
-
Demo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55import numpy as np
import albumentations as A
from PIL import Image
import matplotlib.pyplot as plt
# You can download the astronaut.jpg image from
# https://github.com/pytorch/vision/blob/main/gallery/
# and convert annotation.json file to mask by yourself
path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"
TransformsList = [
A.RandomBrightnessContrast(brightness_limit=0.25, contrast_limit=0.25, p=1),
A.RandomRotate90(p=1),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255)
]
# read image and mask
image = Image.open(path2image) # PIL Image
mask = Image.open(path2mask) # PIL Image
# convet PIL Image to numpy array
image = np.array(image) # shape: HxWx3
mask = np.array(mask) # shape: HxWx3
plt.close()
_, axes = plt.subplots(
nrows=2, ncols=len(TransformsList)+1,
figsize=(3*(len(TransformsList)+1), 3*2) # (size*cols, size*rows)
)
axes[0][0].imshow(image)
axes[0][0].set_title("Original")
axes[1][0].imshow(mask)
for i, transform in enumerate(TransformsList):
'''Don't transform image and mask separately
when transforms contain ramdom operators'''
# trans_image = transform(image) # <-- Don't
# trans_mask = transform(mask) # <-- Don't
transfromed = transform(image=image.copy(), mask=mask.copy()) # <-- Instead
trans_image = transfromed['image']
trans_mask = transfromed['mask']
print(f"{type(transform).__name__}: {trans_image.shape = }")
print(f"{type(transform).__name__}: {trans_mask.shape = }")
# RandomBrightnessContrast: trans_image.shape = (512, 512, 3)
# RandomBrightnessContrast: trans_mask.shape = (512, 512)
# RandomRotate90: trans_image.shape = (512, 512, 3)
# RandomRotate90: trans_mask.shape = (512, 512)
# Normalize: trans_image.shape = (512, 512, 3)
# Normalize: trans_mask.shape = (512, 512) <-- Note the channels of mask
axes[0][i+1].imshow(trans_image)
axes[0][i+1].set_title(type(transform).__name__)
axes[1][i+1].imshow(trans_mask)
plt.show()
compose
-
Docs
-
Demo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41import numpy as np
import albumentations as A
from PIL import Image
import matplotlib.pyplot as plt
# You can download the astronaut.jpg image from
# https://github.com/pytorch/vision/blob/main/gallery/
# and convert annotation.json file to mask by yourself
path2image = r"./coco/images/astronaut.jpg"
path2mask = r"./mask.png"
# read image and mask
image = Image.open(path2image) # PIL Image
mask = Image.open(path2mask) # PIL Image
# convet PIL Image to numpy array
image = np.array(image) # shape: HxWx3
mask = np.array(mask) # shape: HxWx3
transform = A.Compose([
A.RandomCrop(width=224, height=224),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
])
transformed = transform(image=image, mask=mask)
trans_image = transformed['image']
trans_mask = transformed['mask']
plt.close()
_, axes = plt.subplots(
nrows=1, ncols=4,
figsize=(3*4, 3*1) # (size*cols, size*rows)
)
axes[0].imshow(image)
axes[0].set_title("orginal image")
axes[1].imshow(trans_image)
axes[1].set_title("transformed image")
axes[2].imshow(mask)
axes[2].set_title("orginal mask")
axes[3].imshow(trans_mask)
axes[3].set_title("transformed mask")
Normalization
-
Intro
1
2
3
4
5
6
7
8import albumentations as A
A.Normalize(
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_pixel_value=255.0,
always_apply=False,
p=1.0
) -
Understanding
Normalization is applied by the formula:img=(max_pixel_valueimg−mean)/std
-
Demo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53# import torch
import numpy as np
import albumentations as A
channel1 = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
channel2 = [[0, 0, 2], [0, 2, 0], [2, 0, 0]]
arr = np.array([channel1, channel2]) # C×H×W: (2, 3, 3)
print(arr)
# [[[1 0 0]
# [0 1 0]
# [0 0 1]]
# [[0 0 2]
# [0 2 0]
# [2 0 0]]]
arr = arr.transpose(1, 2, 0) # H×W×C: (3, 3, 2)
print(arr)
# [[[1 0]
# [0 0]
# [0 2]]
# [[0 0]
# [1 2]
# [0 0]]
# [[0 2]
# [0 0]
# [1 0]]]
'''
channle 1:
1: (1/max(255) - mean(0))/std(1) = 0.0039
0: (0/max(255) - mean(0))/std(1) = 0
channle 2:
2: (2/max(255) - mean(1))/std(2) = -0.4961
0: (0/max(255) - mean(1))/std(2) = -0.5
'''
transform = A.Normalize(mean=[0, 1], std=[1, 2])
# Limit the number of elements shown: reserve 4 decimal places
np.set_printoptions(precision=4)
trans_arr = transform(image=arr)
print(trans_arr)
# {'image': array([[[ 0.0039, -0.5 ],
# [ 0. , -0.5 ],
# [ 0. , -0.4961]],
# [[ 0. , -0.5 ],
# [ 0.0039, -0.4961],
# [ 0. , -0.5 ]],
# [[ 0. , -0.4961],
# [ 0. , -0.5 ],
# [ 0.0039, -0.5 ]]], dtype=float32)}
Detectron2
timm
- Path to website
Pytorch Image Models (timm)
How to fix pretrained=True
LocalEntryNotFoundError when loading downloaded pretrained model using timm.create_model
1 | import timm |
Where checkpoint
can be *.safetensors
, *.bin
, *.pth
, *.pt
, *.ckpt
etc files.
How to get the location where pretrained models were downloaded
-
Print the constant HUGGINGFACE_HUB_CACHE
1
2
3
4from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
print(HUGGINGFACE_HUB_CACHE)
# windows: C:\Users\*\.cache\huggingface\hub
# colab: /root/.cache/huggingface/hub -
Print the detailed report of caches
You can also print a detailed report directly from the huggingface-cli using:1
huggingface-cli scan-cache
1
2
3
4
5
6(conda_env) ~>huggingface-cli scan-cache
REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
--------------------- --------- ------------ -------- ------------- ------------- ---- -------------------------------------------------------
timm/resnet34.a1_in1k model 87.3M 2 4 hours ago 4 hours ago main ~\.cache\huggingface\hub\models--timm--resnet34.a1_in1k
Done in 0.0s. Scanned 1 repo(s) for a total of 87.3M. -
The folder structure of cache, take
timm/resnet34.a1_in1k
for example
Manage huggingface_hub cache-system1
2
3
4
5
6
7
8
9C:\Users\*\.cache\huggingface\hub
├── models--timm--resnet34.a1_in1k
| ├── blobs
| ├── refs
| | └── main
| └── snapshots
| └── 19cbb59c79ac0ee9142c596cdf336d3beb9683d2
| ├── config.json
| └── model.safetensors
Usage
requests.exceptions.SSLError:(MaxRetryError("HTTPSConnectionPool(host=‘huggingface.co’, port=443)
1 | import os |
einops
rearrange
1 |