Source code for optical.converter.yolo

"""
__author__: HashTagML
license: MIT
Created: Monday, 29th March 2021
"""


import os
import warnings
from pathlib import Path
from typing import Union

import imagesize
import yaml
import numpy as np
import pandas as pd

from .base import FormatSpec
from .utils import exists, get_image_dir, get_annotation_dir


[docs]class Yolo(FormatSpec): """Represents a YOLO annotation object. Args: root (Union[str, os.PathLike]): path to root directory. Expects the ``root`` directory to have either of the following layouts: .. code-block:: bash root ├── images │ ├── train │ │ ├── 1.jpg │ │ ├── 2.jpg │ │ │ ... │ │ └── n.jpg │ ├── valid (...) │ └── test (...) └── annotations ├── train │ ├── 1.txt │ ├── 2.txt │ │ ... │ └── n.txt ├── valid (...) ├── test (...) └── dataset.yaml [Optional] or, .. code-block:: bash root ├── images │ ├── 1.jpg │ ├── 2.jpg │ │ ... │ └── n.jpg └── annotations ├── 1.txt ├── 2.txt │ ... ├── n.txt └── dataset.yaml [Optional] """
[docs] def __init__(self, root: Union[str, os.PathLike]): # self.root = root super().__init__(root) self.class_file = [y for y in Path(self.root).glob("*.yaml")] self._image_dir = get_image_dir(root) self._annotation_dir = get_annotation_dir(root) self._has_image_split = False assert exists(self._image_dir), "root is missing 'images' directory." assert exists(self._annotation_dir), "root is missing 'annotations' directory." self._find_splits() self._resolve_dataframe()
def _resolve_dataframe(self): master_df = pd.DataFrame( columns=[ "split", "image_id", "image_width", "image_height", "x_min", "y_min", "width", "height", "category", "image_path", ], ) for split in self._splits: image_ids = [] image_paths = [] class_ids = [] x_mins = [] y_mins = [] bbox_widths = [] bbox_heights = [] image_heights = [] image_widths = [] split = split if self._has_image_split else "" annotations = Path(self._annotation_dir).joinpath(split).glob("*.txt") for txt in annotations: stem = txt.stem try: img_file = list(Path(self._image_dir).joinpath(split).glob(f"{stem}*"))[0] im_width, im_height = imagesize.get(img_file) with open(txt, "r") as f: instances = f.read().strip().split("\n") for ins in instances: class_id, x, y, w, h = list(map(float, ins.split())) image_ids.append(img_file.name) image_paths.append(img_file) class_ids.append(int(class_id)) x_mins.append(max(float((float(x) - w / 2) * im_width), 0)) y_mins.append(max(float((y - h / 2) * im_height), 0)) bbox_widths.append(float(w * im_width)) bbox_heights.append(float(h * im_height)) image_widths.append(im_width) image_heights.append(im_height) except IndexError: # if the image file does not exist pass annots_df = pd.DataFrame( list( zip( image_ids, image_paths, image_widths, image_heights, class_ids, x_mins, y_mins, bbox_widths, bbox_heights, ) ), columns=[ "image_id", "image_path", "image_width", "image_height", "class_id", "x_min", "y_min", "width", "height", ], ) annots_df["split"] = split if split else "main" master_df = pd.concat([master_df, annots_df], ignore_index=True) # get category names from `dataset.yaml` try: with open(Path(self._annotation_dir).joinpath("dataset.yaml")) as f: label_desc = yaml.load(f, Loader=yaml.FullLoader) categories = label_desc["names"] label_map = dict(zip(range(len(categories)), categories)) except FileNotFoundError: label_map = dict() warnings.warn(f"No `dataset.yaml` file found in {self._annotation_dir}") master_df["class_id"] = master_df["class_id"].astype(np.int32) if label_map: master_df["category"] = master_df["class_id"].map(label_map) else: master_df["category"] = master_df["class_id"].astype(str) self.master_df = master_df