Source code for optical.converter.simple_json

"""
__author__: HashTagML
license: MIT
Created: Monday, 29th March 2021
"""


import json
import os
from pathlib import Path
from typing import Union

import imagesize
import pandas as pd

from .base import FormatSpec
from .utils import exists, get_annotation_dir, get_image_dir


[docs]class SimpleJson(FormatSpec): """Represents a SimleJson annotation object. Args: root (Union[str, os.PathLike]): path to root directory. Expects the ``root`` directory to have either of the following layouts: .. code-block:: bash root ├── images │ ├── train │ │ ├── 1.jpg │ │ ├── 2.jpg │ │ │ ... │ │ └── n.jpg │ ├── valid (...) │ └── test (...) └── annotations ├── train.json ├── valid.json └── test.json or, .. code-block:: bash root ├── images │ ├── 1.jpg │ ├── 2.jpg │ │ ... │ └── n.jpg └── annotations └── label.json """
[docs] def __init__(self, root: Union[str, os.PathLike]): # self.root = Path(root) super().__init__(root) self._image_dir = get_image_dir(root) self._annotation_dir = get_annotation_dir(root) self._has_image_split = False assert exists(self._image_dir), "root is missing `images` directory." assert exists(self._annotation_dir), "root is missing `annotations` directory." self._find_splits() self._resolve_dataframe()
def _resolve_dataframe(self): columns = [ "image_id", "image_path", "image_width", "image_height", "x_min", "y_min", "width", "height", "category", "class_id", "split", "score", ] image_ids, image_paths, image_widths, image_heights = [], [], [], [] x_mins, y_mins, widths, heights = [], [], [], [] categorys, class_ids, scores, splits = [], [], [], [] for split in self._splits: simple_json = self._annotation_dir / f"{split}.json" with open(simple_json) as f: annotations = json.load(f) class_map = {} num_images = len(annotations) num_anns = 0 if num_images == 0: raise RuntimeWarning(f"Annotation file {simple_json} is empty. Please check.") for im_id, anns in annotations.items(): split_path = split if self._has_image_split else "" im_path = list(Path(self._image_dir).joinpath(split_path).glob(f"{im_id}"))[0] im_width, im_height = imagesize.get(im_path) if not len(anns): image_ids.append(im_id) image_paths.append(im_path) image_widths.append(im_width) image_heights.append(im_height) x_mins.append(None), y_mins.append(None), widths.append(None), heights.append(None) categorys.append(None), class_ids.append(None), scores.append(None) splits.append(split) for ann in anns: image_ids.append(im_id) image_paths.append(im_path) image_widths.append(im_width) image_heights.append(im_height) bbox = ann["bbox"] bbox[2] -= bbox[0] bbox[3] -= bbox[1] x_mins.append(bbox[0]), y_mins.append(bbox[1]), widths.append(bbox[2]), heights.append(bbox[3]) category = ann["classname"] categorys.append(category) if class_map.get(category, None) is None: class_id = len(class_map) class_map[category] = class_id class_ids.append(class_id) else: class_ids.append(class_map[category]) scores.append(ann.get("confidence", None)) splits.append(split) num_anns += 1 data = {} for col in columns: data[col] = eval(col + "s") self.master_df = pd.DataFrame(data=data)