"""
__author__: HashTagML
license: MIT
Created: Monday, 29th March 2021
"""
import os
import warnings
from pathlib import Path
from typing import Union
import imagesize
import yaml
import numpy as np
import pandas as pd
from .base import FormatSpec
from .utils import exists, get_image_dir, get_annotation_dir
[docs]class Yolo(FormatSpec):
"""Represents a YOLO annotation object.
Args:
root (Union[str, os.PathLike]): path to root directory. Expects the ``root`` directory to have either
of the following layouts:
.. code-block:: bash
root
├── images
│ ├── train
│ │ ├── 1.jpg
│ │ ├── 2.jpg
│ │ │ ...
│ │ └── n.jpg
│ ├── valid (...)
│ └── test (...)
│
└── annotations
├── train
│ ├── 1.txt
│ ├── 2.txt
│ │ ...
│ └── n.txt
├── valid (...)
├── test (...)
└── dataset.yaml [Optional]
or,
.. code-block:: bash
root
├── images
│ ├── 1.jpg
│ ├── 2.jpg
│ │ ...
│ └── n.jpg
│
└── annotations
├── 1.txt
├── 2.txt
│ ...
├── n.txt
└── dataset.yaml [Optional]
"""
[docs] def __init__(self, root: Union[str, os.PathLike]):
# self.root = root
super().__init__(root)
self.class_file = [y for y in Path(self.root).glob("*.yaml")]
self._image_dir = get_image_dir(root)
self._annotation_dir = get_annotation_dir(root)
self._has_image_split = False
assert exists(self._image_dir), "root is missing 'images' directory."
assert exists(self._annotation_dir), "root is missing 'annotations' directory."
self._find_splits()
self._resolve_dataframe()
def _resolve_dataframe(self):
master_df = pd.DataFrame(
columns=[
"split",
"image_id",
"image_width",
"image_height",
"x_min",
"y_min",
"width",
"height",
"category",
"image_path",
],
)
for split in self._splits:
image_ids = []
image_paths = []
class_ids = []
x_mins = []
y_mins = []
bbox_widths = []
bbox_heights = []
image_heights = []
image_widths = []
split = split if self._has_image_split else ""
annotations = Path(self._annotation_dir).joinpath(split).glob("*.txt")
for txt in annotations:
stem = txt.stem
try:
img_file = list(Path(self._image_dir).joinpath(split).glob(f"{stem}*"))[0]
im_width, im_height = imagesize.get(img_file)
with open(txt, "r") as f:
instances = f.read().strip().split("\n")
for ins in instances:
class_id, x, y, w, h = list(map(float, ins.split()))
image_ids.append(img_file.name)
image_paths.append(img_file)
class_ids.append(int(class_id))
x_mins.append(max(float((float(x) - w / 2) * im_width), 0))
y_mins.append(max(float((y - h / 2) * im_height), 0))
bbox_widths.append(float(w * im_width))
bbox_heights.append(float(h * im_height))
image_widths.append(im_width)
image_heights.append(im_height)
except IndexError: # if the image file does not exist
pass
annots_df = pd.DataFrame(
list(
zip(
image_ids,
image_paths,
image_widths,
image_heights,
class_ids,
x_mins,
y_mins,
bbox_widths,
bbox_heights,
)
),
columns=[
"image_id",
"image_path",
"image_width",
"image_height",
"class_id",
"x_min",
"y_min",
"width",
"height",
],
)
annots_df["split"] = split if split else "main"
master_df = pd.concat([master_df, annots_df], ignore_index=True)
# get category names from `dataset.yaml`
try:
with open(Path(self._annotation_dir).joinpath("dataset.yaml")) as f:
label_desc = yaml.load(f, Loader=yaml.FullLoader)
categories = label_desc["names"]
label_map = dict(zip(range(len(categories)), categories))
except FileNotFoundError:
label_map = dict()
warnings.warn(f"No `dataset.yaml` file found in {self._annotation_dir}")
master_df["class_id"] = master_df["class_id"].astype(np.int32)
if label_map:
master_df["category"] = master_df["class_id"].map(label_map)
else:
master_df["category"] = master_df["class_id"].astype(str)
self.master_df = master_df