|
@@ -4,18 +4,165 @@ import numpy as np
|
|
|
|
|
|
from collections import OrderedDict
|
|
|
from collections import defaultdict
|
|
|
-from os.path import isdir
|
|
|
-from os.path import isfile
|
|
|
-from os.path import join
|
|
|
+from pathlib import Path
|
|
|
+from typing import Tuple
|
|
|
|
|
|
from cvdatasets.dataset import Dataset
|
|
|
from cvdatasets.utils import feature_file_name
|
|
|
-from cvdatasets.utils import read_info_file
|
|
|
from cvdatasets.utils import pretty_print_dict
|
|
|
+from cvdatasets.utils import read_info_file
|
|
|
from cvdatasets.utils.decorators import only_with_info
|
|
|
|
|
|
+
|
|
|
class BaseAnnotations(abc.ABC):
|
|
|
|
|
|
+ def __init__(self, *, root_or_infofile, dataset_key=None, images_folder="images", **kwargs):
|
|
|
+
|
|
|
+ self.dataset_key = dataset_key
|
|
|
+ self.images_folder = images_folder
|
|
|
+
|
|
|
+ root_or_infofile = Path(root_or_infofile)
|
|
|
+ if root_or_infofile.is_dir():
|
|
|
+ self.info = None
|
|
|
+ self.root = root_or_infofile
|
|
|
+
|
|
|
+ elif root_or_infofile.is_file():
|
|
|
+ self.info = read_info_file(root_or_infofile)
|
|
|
+ ds_info = self.dataset_info
|
|
|
+ self.root = self.data_root / ds_info.folder / ds_info.annotations
|
|
|
+
|
|
|
+ else:
|
|
|
+ msg = f"Root folder or info file does not exist: \"{root_or_infofile}\""
|
|
|
+ raise ValueError(msg)
|
|
|
+
|
|
|
+ assert self.root.is_dir(), \
|
|
|
+ f"Annotation directory does not exist: \"{self.root}\"!"
|
|
|
+
|
|
|
+ self.files = self.read_annotation_files()
|
|
|
+ self.parse_annotations()
|
|
|
+
|
|
|
+ @property
|
|
|
+ @only_with_info
|
|
|
+ def data_root(self):
|
|
|
+ return Path(self.info.BASE_DIR) / self.info.DATA_DIR
|
|
|
+
|
|
|
+ @property
|
|
|
+ @only_with_info
|
|
|
+ def dataset_info(self):
|
|
|
+ key = getattr(self.__class__, "name", None)
|
|
|
+
|
|
|
+ if key is None:
|
|
|
+ key = self.dataset_key
|
|
|
+
|
|
|
+ if key not in self.info.DATASETS:
|
|
|
+ raise ValueError(f"Cannot find dataset with key \"{key}\"")
|
|
|
+
|
|
|
+ return self.info.DATASETS[key]
|
|
|
+
|
|
|
+ def parse_annotations(self):
|
|
|
+ logging.debug("Parsing read annotations (uuids, labels and train-test splits)")
|
|
|
+ self._parse_uuids()
|
|
|
+ self._parse_labels()
|
|
|
+ self._parse_split()
|
|
|
+
|
|
|
+ def __getitem__(self, uuid) -> Tuple[str, int]:
|
|
|
+ return self.image(uuid), self.label(uuid)
|
|
|
+
|
|
|
+ def image_path(self, image) -> str:
|
|
|
+ return str(self.root / self.images_folder / image)
|
|
|
+
|
|
|
+ def image(self, uuid) -> str:
|
|
|
+ fname = self.image_names[self.uuid_to_idx[uuid]]
|
|
|
+ return self.image_path(fname)
|
|
|
+
|
|
|
+ def label(self, uuid) -> int:
|
|
|
+ return self.labels[self.uuid_to_idx[uuid]].copy()
|
|
|
+
|
|
|
+ def parts(self, uuid) -> object:
|
|
|
+ return None
|
|
|
+
|
|
|
+ def bounding_box(self, uuid) -> object:
|
|
|
+ return None
|
|
|
+
|
|
|
+ def _uuids(self, split) -> np.ndarray:
|
|
|
+ return self.uuids[split]
|
|
|
+
|
|
|
+ @property
|
|
|
+ def train_uuids(self):
|
|
|
+ return self._uuids(self.train_split)
|
|
|
+
|
|
|
+ @property
|
|
|
+ def test_uuids(self):
|
|
|
+ return self._uuids(self.test_split)
|
|
|
+
|
|
|
+ def new_train_test_datasets(self, dataset_cls=Dataset, **kwargs):
|
|
|
+ return (self.new_dataset(subset, dataset_cls) for subset in ["train", "test"])
|
|
|
+
|
|
|
+ def new_dataset(self, subset=None, dataset_cls=Dataset, **kwargs):
|
|
|
+ if subset is not None:
|
|
|
+ uuids = getattr(self, "{}_uuids".format(subset))
|
|
|
+ else:
|
|
|
+ uuids = self.uuids
|
|
|
+
|
|
|
+ kwargs = self.check_dataset_kwargs(subset, **kwargs)
|
|
|
+ return dataset_cls(uuids=uuids, annotations=self, **kwargs)
|
|
|
+
|
|
|
+ def check_dataset_kwargs(self, subset, **kwargs):
|
|
|
+ dataset_info = self.dataset_info
|
|
|
+ if dataset_info is None:
|
|
|
+ return kwargs
|
|
|
+
|
|
|
+ logging.debug("Dataset info: {}".format(pretty_print_dict(dataset_info)))
|
|
|
+
|
|
|
+
|
|
|
+ new_kwargs = {}
|
|
|
+
|
|
|
+ if "scales" in dataset_info:
|
|
|
+ new_kwargs["ratio"] = dataset_info.scales[0]
|
|
|
+
|
|
|
+ if "is_uniform" in dataset_info:
|
|
|
+ new_kwargs["uniform_parts"] = dataset_info.is_uniform
|
|
|
+
|
|
|
+ feature_model = getattr(self, "feature_model", None)
|
|
|
+ if None not in [subset, feature_model]:
|
|
|
+ tried = []
|
|
|
+ model_info = self.info.MODELS[feature_model]
|
|
|
+ for subset_phony in BaseAnnotations.FEATURE_PHONY[subset]:
|
|
|
+ features = feature_file_name(subset_phony, dataset_info, model_info)
|
|
|
+ feature_path = join(self.root, "features", features)
|
|
|
+ if isfile(feature_path): break
|
|
|
+ tried.append(feature_path)
|
|
|
+ else:
|
|
|
+ raise ValueError(
|
|
|
+ "Could not find any features in \"{}\" for {} subset. Tried features: {}".format(
|
|
|
+ join(self.root, "features"), subset, tried))
|
|
|
+
|
|
|
+ logging.info("Using features file from \"{}\"".format(feature_path))
|
|
|
+ new_kwargs["features"] = feature_path
|
|
|
+ new_kwargs.update(kwargs)
|
|
|
+
|
|
|
+ logging.debug("Final kwargs: {}".format(pretty_print_dict(new_kwargs)))
|
|
|
+ return new_kwargs
|
|
|
+
|
|
|
+ @abc.abstractmethod
|
|
|
+ def read_annotation_files(self):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+ @abc.abstractmethod
|
|
|
+ def _parse_uuids(self):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+ @abc.abstractmethod
|
|
|
+ def _parse_labels(self):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+ @abc.abstractmethod
|
|
|
+ def _parse_split(self):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+
|
|
|
+class _BaseAnnotations(abc.ABC):
|
|
|
+
|
|
|
FEATURE_PHONY = dict(train=["train"], test=["test", "val"])
|
|
|
|
|
|
@classmethod
|
|
@@ -54,7 +201,6 @@ class BaseAnnotations(abc.ABC):
|
|
|
|
|
|
self.load()
|
|
|
|
|
|
-
|
|
|
@property
|
|
|
@only_with_info
|
|
|
def data_root(self):
|