Source code for optical.converter.createml

"""
__author__: HashTagML
license: MIT
Created: Wednesday, 31st March 2021
"""

import json
import os
import warnings
from typing import Union

import imagesize
import pandas as pd

from .base import FormatSpec
from .utils import exists, get_annotation_dir, get_image_dir


[docs]class CreateML(FormatSpec):
    """Class to handle createML json annotation transformations

    Args:
        root (Union[str, os.PathLike]): path to root directory. Expects the ``root`` directory to have either
            of the following layouts:

            .. code-block:: bash

                root
                ├── images
                │   ├── train
                │   │   ├── 1.jpg
                │   │   ├── 2.jpg
                │   │   │   ...
                │   │   └── n.jpg
                │   ├── valid (...)
                │   └── test (...)
                │
                └── annotations
                    ├── train.json
                    ├── valid.json
                    └── test.json

            or,

            .. code-block:: bash

                root
                ├── images
                │   ├── 1.jpg
                │   ├── 2.jpg
                │   │   ...
                │   └── n.jpg
                │
                └── annotations
                    └── label.json
    """

[docs]    def __init__(self, root: Union[str, os.PathLike]):
        # self.root = root
        super().__init__(root)
        self._image_dir = get_image_dir(root)
        self._annotation_dir = get_annotation_dir(root)
        self._has_image_split = False
        assert exists(self._image_dir), "root is missing `images` directory."
        assert exists(self._annotation_dir), "root is missing `annotations` directory."
        self._find_splits()
        self._resolve_dataframe()

    def _resolve_dataframe(self):
        master_data = {
            "image_id": [],
            "image_path": [],
            "image_width": [],
            "image_height": [],
            "x_min": [],
            "y_min": [],
            "width": [],
            "height": [],
            "category": [],
            "split": [],
        }

        # checking if there is splitting or not

        for split in self._splits:
            image_dir = self._image_dir / split if self._has_image_split else self._image_dir
            split_value = split if self._has_image_split else "main"

            with open(self._annotation_dir / f"{split}.json", "r") as f:
                json_data = json.load(f)

            total_data = len(json_data)
            if total_data == 0:
                raise "annotation file is empty"

            for data in json_data:
                image_name = data["image"]
                image_path = image_dir / image_name
                # check if image file exists in the image directory
                if not image_path.is_file():
                    warnings.warn(f"Not able to find image {image_name} in path {image_dir}.")
                    continue
                image_width, image_height = imagesize.get(image_path)
                for annotation in data["annotations"]:
                    master_data["image_id"].append(image_name)
                    master_data["image_path"].append(image_dir.joinpath(image_name))
                    master_data["width"].append(annotation["coordinates"]["width"])
                    master_data["height"].append(annotation["coordinates"]["height"])
                    master_data["x_min"].append(annotation["coordinates"]["x"])
                    master_data["y_min"].append(annotation["coordinates"]["y"])
                    master_data["category"].append(annotation["label"])
                    master_data["image_height"].append(image_height)
                    master_data["image_width"].append(image_width)
                    master_data["split"].append(split_value)

        df = pd.DataFrame(master_data)
        # creating class ids based on unique categories
        class_map_df = df["category"].drop_duplicates().reset_index(drop=True).to_frame()
        class_map_df["class_id"] = class_map_df.index.values
        self.master_df = pd.merge(df, class_map_df, on="category")