COCO format

Reference

Transforms v2: End-to-end object detection/segmentation example
Transforms v2: End-to-end object detection/segmentation example
Youtube video introducing COCO
COCO Dataset Format - Complete Walkthrough
Blog that introduces COCO format
COCO format , what and how

Waht is COCO format

Introduction
The COCO (Common Objects in Context) format is a standard format for storing and sharing annotations for images and videos. It was developed for the COCO image and video recognition challenge, which is a large-scale benchmark for object detection and image segmentation.

In the COCO format, annotations are stored in a JSON file, which contains information about the image or video, including the file path, size, and a list of annotated objects. Each object is represented by a bounding box, which specifies the location and size of the object in the image, as well as a label indicating the class of the object.

Code Demo

How to create mask images from COCO annotations?

# %%
# import package
import os
from PIL import Image
from matplotlib import pyplot as plt
from pycocotools.coco import COCO

# %%
# some usage about coco
'''
You can download the astronaut.jpg image from
https://github.com/pytorch/vision/blob/main/gallery/assets/astronaut.jpg
and instances.json annotation from
https://github.com/pytorch/vision/blob/main/gallery/assets/coco/instances.json
Besides, you need to rename the "file_nmae" from "000000000001.jpg" to "astronaut.jpg" in the json file.
Folder Structure:
coco
├── images
│   └── astronaut.jpg
├── instances.json
'''
IMAGES_PATH = r"./coco/images"
ANNOTATIONS_PATH = r"./coco/instances.json"

coco = COCO(ANNOTATIONS_PATH)
# type(coco) = <class 'pycocotools.coco.COCO'>

img_id = 1
img = coco.imgs[img_id] # <class 'dict'>
ann = coco.anns[img_id] # <class 'dict'>

print(f"{img.keys() = }")
# img.keys() = dict_keys(['file_name', 'height', 'width', 'id'])
print(f"{ann.keys() = }")
# ann.keys() = dict_keys(['segmentation', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id'])

# %%
# show image
image_path = os.path.join(IMAGES_PATH, img['file_name'])
image = Image.open(image_path)
plt.imshow(image)
plt.show()

# %%
# show mask
mask = coco.annToMask(ann)
# type(mask) = <class 'numpy.ndarray'>
# mask.shape = (512, 512)
# np.unique(mask) = [0, 1]
plt.imshow(mask)
plt.show()

# %%
# draw scatter plots on the mask
coordinate_seg = ann['segmentation'][0]
xs = []; ys = []
for idx in range(0, len(coordinate_seg), 2):
    xs.append(coordinate_seg[idx])   # pixels to the left edge (start at 0)
    ys.append(coordinate_seg[idx+1]) # pixels to the top edge (start at 0)

plt.imshow(mask)
plt.scatter(x=xs, y=ys, s=15, marker='x', color='red')
plt.show()

# %%
# draw rectangle on the mask
from matplotlib.patches import Rectangle
from copy import copy

coordinate_box = ann['bbox']
#  (xy)---- width -----+
#   |                  |
# height               |
#   |                  |
#   +------------------+
box = Rectangle(
        xy=coordinate_box[0:2],
        width=coordinate_box[2],
        height=coordinate_box[3],
        edgecolor='red',
        facecolor='none'
      )

# gca: Get the Current Axes
# use copy to avoid "RuntimeError: Can not put single artist in more than one figure"
plt.gca().add_patch(copy(box))
plt.imshow(mask)
plt.show()

# %%
# group photo
plt.close()
_, axes = plt.subplots(
    nrows=1, ncols=4,
    figsize=(5*4, 5) # (size*cols, size*rows)
)
axes[0].imshow(image)
axes[1].imshow(mask)
axes[2].imshow(mask)
axes[2].scatter(x=xs, y=ys, s=15, marker='x', color='red')
axes[3].imshow(mask)
axes[3].add_patch(copy(box))
plt.show()

astronaut-mask-scatter-rectangle

How to save the mask

# %%
# import package
from PIL import Image
from matplotlib import pyplot as plt
from pycocotools.coco import COCO

# %%
# some usage about coco
'''
You can download the astronaut.jpg image from
https://github.com/pytorch/vision/blob/main/gallery/assets/astronaut.jpg
and instances.json annotation from
https://github.com/pytorch/vision/blob/main/gallery/assets/coco/instances.json
Besides, you need to rename the "file_nmae" from "000000000001.jpg" to "astronaut.jpg" in the json file.
Folder Structure:
coco
├── images
│   └── astronaut.jpg
├── instances.json
'''
IMAGES_PATH = r"./coco/images"
ANNOTATIONS_PATH = r"./coco/instances.json"

coco = COCO(ANNOTATIONS_PATH)

img_id = 1
img = coco.imgs[img_id] # <class 'dict'>
ann = coco.anns[img_id] # <class 'dict'>

# %%
# save mask
mask = coco.annToMask(ann)
# type(mask) = <class 'numpy.ndarray'>
# mask.shape = (512, 512)
# np.unique(mask) = [0, 1]

# numpy.ndarray to PIL.Image
mask = Image.fromarray(mask)
mask.save('mask.png')

# %%
# read saved mask
mask = Image.open("mask.png")
# np.unique(mask) = [0, 1]
plt.imshow(mask)
plt.show()