5 years ago · 177127f1ce
--- a/cvdatasets/__init__.py
+++ b/cvdatasets/__init__.py
@@ -1,13 +1,10 @@
 
				-from .dataset import Dataset, ImageWrapperDataset
			
 
				+from cvdatasets.dataset import Dataset
			
 
				+from cvdatasets.dataset import ImageWrapperDataset
			
 
				 
			
 
				-from .annotations import BaseAnnotations
			
 
				-from .annotations import CUB_Annotations
			
 
				-from .annotations import NAB_Annotations
			
 
				-from .annotations import CARS_Annotations
			
 
				-from .annotations import INAT19_Annotations
			
 
				-from .annotations import FLOWERS_Annotations
			
 
				-from .annotations import HED_Annotations
			
 
				-from .annotations import AnnotationType
			
 
				+from cvdatasets.annotation import BaseAnnotations
			
 
				+from cvdatasets.annotation.types import FileListAnnotations
			
 
				+from cvdatasets.annotation.types import FolderAnnotations
			
 
				+from cvdatasets.annotation.types import JSONAnnotations
			
 
				 
			
 
				-from .utils import _MetaInfo
			
 
				+from cvdatasets.utils import _MetaInfo
			
 
				 
			
--- a/cvdatasets/annotation/__init__.py
+++ b/cvdatasets/annotation/__init__.py
@@ -1,37 +1,38 @@
 
				-from cvdatasets.annotations.annotation_types import AnnotationType
			
 
				-from cvdatasets.annotations.impl.birdsnap import BSNAP_Annotations
			
 
				-from cvdatasets.annotations.impl.cars import CARS_Annotations
			
 
				-from cvdatasets.annotations.impl.cub import CUB_Annotations
			
 
				-from cvdatasets.annotations.impl.dogs import DOGS_Annotations
			
 
				-from cvdatasets.annotations.impl.flowers import FLOWERS_Annotations
			
 
				-from cvdatasets.annotations.impl.hed import HED_Annotations
			
 
				-from cvdatasets.annotations.impl.imagenet import INET_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT18_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT19_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT20_Annotations
			
 
				-from cvdatasets.annotations.impl.nab import NAB_Annotations
			
 
				-from cvdatasets.annotations.impl.tigers import TIGERS_Annotations
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.types import AnnotationType
			
 
				+
			
 
				+# from cvdatasets.annotation.types.birdsnap import BSNAP_Annotations
			
 
				+# from cvdatasets.annotation.types.cars import CARS_Annotations
			
 
				+# from cvdatasets.annotation.types.cub import CUB_Annotations
			
 
				+# from cvdatasets.annotation.types.dogs import DOGS_Annotations
			
 
				+# from cvdatasets.annotation.types.flowers import FLOWERS_Annotations
			
 
				+# from cvdatasets.annotation.types.hed import HED_Annotations
			
 
				+# from cvdatasets.annotation.types.imagenet import INET_Annotations
			
 
				+# from cvdatasets.annotation.types.inat import INAT18_Annotations
			
 
				+# from cvdatasets.annotation.types.inat import INAT19_Annotations
			
 
				+# from cvdatasets.annotation.types.inat import INAT20_Annotations
			
 
				+# from cvdatasets.annotation.types.nab import NAB_Annotations
			
 
				+# from cvdatasets.annotation.types.tigers import TIGERS_Annotations
			
 
				 
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				 
			
 
				 __all__ = [
			
 
				 	"AnnotationType",
			
 
				 	"BaseAnnotations",
			
 
				 	"BBoxMixin",
			
 
				-	"BSNAP_Annotations",
			
 
				-	"CARS_Annotations",
			
 
				-	"CUB_Annotations",
			
 
				-	"DOGS_Annotations",
			
 
				-	"FLOWERS_Annotations",
			
 
				-	"HED_Annotations",
			
 
				-	"INAT18_Annotations",
			
 
				-	"INAT19_Annotations",
			
 
				-	"INAT20_Annotations",
			
 
				-	"INET_Annotations",
			
 
				-	"NAB_Annotations",
			
 
				+	# "BSNAP_Annotations",
			
 
				+	# "CARS_Annotations",
			
 
				+	# "CUB_Annotations",
			
 
				+	# "DOGS_Annotations",
			
 
				+	# "FLOWERS_Annotations",
			
 
				+	# "HED_Annotations",
			
 
				+	# "INAT18_Annotations",
			
 
				+	# "INAT19_Annotations",
			
 
				+	# "INAT20_Annotations",
			
 
				+	# "INET_Annotations",
			
 
				+	# "NAB_Annotations",
			
 
				 	"PartsMixin",
			
 
				-	"TIGERS_Annotations",
			
 
				+	# "TIGERS_Annotations",
			
 
				 ]
			
--- a/cvdatasets/annotation/base.py
+++ b/cvdatasets/annotation/base.py
@@ -4,18 +4,165 @@ import numpy as np
 
				 
			
 
				 from collections import OrderedDict
			
 
				 from collections import defaultdict
			
 
				-from os.path import isdir
			
 
				-from os.path import isfile
			
 
				-from os.path import join
			
 
				+from pathlib import Path
			
 
				+from typing import Tuple
			
 
				 
			
 
				 from cvdatasets.dataset import Dataset
			
 
				 from cvdatasets.utils import feature_file_name
			
 
				-from cvdatasets.utils import read_info_file
			
 
				 from cvdatasets.utils import pretty_print_dict
			
 
				+from cvdatasets.utils import read_info_file
			
 
				 from cvdatasets.utils.decorators import only_with_info
			
 
				 
			
 
				+
			
 
				 class BaseAnnotations(abc.ABC):
			
 
				 
			
 
				+	def __init__(self, *, root_or_infofile, dataset_key=None, images_folder="images", **kwargs):
			
 
				+
			
 
				+		self.dataset_key = dataset_key
			
 
				+		self.images_folder = images_folder
			
 
				+
			
 
				+		root_or_infofile = Path(root_or_infofile)
			
 
				+		if root_or_infofile.is_dir():
			
 
				+			self.info = None
			
 
				+			self.root = root_or_infofile
			
 
				+
			
 
				+		elif root_or_infofile.is_file():
			
 
				+			self.info = read_info_file(root_or_infofile)
			
 
				+			ds_info = self.dataset_info
			
 
				+			self.root = self.data_root / ds_info.folder / ds_info.annotations
			
 
				+
			
 
				+		else:
			
 
				+			msg = f"Root folder or info file does not exist: \"{root_or_infofile}\""
			
 
				+			raise ValueError(msg)
			
 
				+
			
 
				+		assert self.root.is_dir(), \
			
 
				+			f"Annotation directory does not exist: \"{self.root}\"!"
			
 
				+
			
 
				+		self.files = self.read_annotation_files()
			
 
				+		self.parse_annotations()
			
 
				+
			
 
				+	@property
			
 
				+	@only_with_info
			
 
				+	def data_root(self):
			
 
				+		return Path(self.info.BASE_DIR) / self.info.DATA_DIR
			
 
				+
			
 
				+	@property
			
 
				+	@only_with_info
			
 
				+	def dataset_info(self):
			
 
				+		key = getattr(self.__class__, "name", None)
			
 
				+
			
 
				+		if key is None:
			
 
				+			key = self.dataset_key
			
 
				+
			
 
				+		if key not in self.info.DATASETS:
			
 
				+			raise ValueError(f"Cannot find dataset with key \"{key}\"")
			
 
				+
			
 
				+		return self.info.DATASETS[key]
			
 
				+
			
 
				+	def parse_annotations(self):
			
 
				+		logging.debug("Parsing read annotations (uuids, labels and train-test splits)")
			
 
				+		self._parse_uuids()
			
 
				+		self._parse_labels()
			
 
				+		self._parse_split()
			
 
				+
			
 
				+	def __getitem__(self, uuid) -> Tuple[str, int]:
			
 
				+		return self.image(uuid), self.label(uuid)
			
 
				+
			
 
				+	def image_path(self, image) -> str:
			
 
				+		return str(self.root / self.images_folder / image)
			
 
				+
			
 
				+	def image(self, uuid) -> str:
			
 
				+		fname = self.image_names[self.uuid_to_idx[uuid]]
			
 
				+		return self.image_path(fname)
			
 
				+
			
 
				+	def label(self, uuid) -> int:
			
 
				+		return self.labels[self.uuid_to_idx[uuid]].copy()
			
 
				+
			
 
				+	def parts(self, uuid) -> object:
			
 
				+		return None
			
 
				+
			
 
				+	def bounding_box(self, uuid) -> object:
			
 
				+		return None
			
 
				+
			
 
				+	def _uuids(self, split) -> np.ndarray:
			
 
				+		return self.uuids[split]
			
 
				+
			
 
				+	@property
			
 
				+	def train_uuids(self):
			
 
				+		return self._uuids(self.train_split)
			
 
				+
			
 
				+	@property
			
 
				+	def test_uuids(self):
			
 
				+		return self._uuids(self.test_split)
			
 
				+
			
 
				+	def new_train_test_datasets(self, dataset_cls=Dataset, **kwargs):
			
 
				+		return (self.new_dataset(subset, dataset_cls) for subset in ["train", "test"])
			
 
				+
			
 
				+	def new_dataset(self, subset=None, dataset_cls=Dataset, **kwargs):
			
 
				+		if subset is not None:
			
 
				+			uuids = getattr(self, "{}_uuids".format(subset))
			
 
				+		else:
			
 
				+			uuids = self.uuids
			
 
				+
			
 
				+		kwargs = self.check_dataset_kwargs(subset, **kwargs)
			
 
				+		return dataset_cls(uuids=uuids, annotations=self, **kwargs)
			
 
				+
			
 
				+	def check_dataset_kwargs(self, subset, **kwargs):
			
 
				+		dataset_info = self.dataset_info
			
 
				+		if dataset_info is None:
			
 
				+			return kwargs
			
 
				+
			
 
				+		logging.debug("Dataset info: {}".format(pretty_print_dict(dataset_info)))
			
 
				+
			
 
				+		# TODO: pass all scales
			
 
				+		new_kwargs = {}
			
 
				+
			
 
				+		if "scales" in dataset_info:
			
 
				+			new_kwargs["ratio"] = dataset_info.scales[0]
			
 
				+
			
 
				+		if "is_uniform" in dataset_info:
			
 
				+			new_kwargs["uniform_parts"] = dataset_info.is_uniform
			
 
				+
			
 
				+		feature_model = getattr(self, "feature_model", None)
			
 
				+		if None not in [subset, feature_model]:
			
 
				+			tried = []
			
 
				+			model_info = self.info.MODELS[feature_model]
			
 
				+			for subset_phony in BaseAnnotations.FEATURE_PHONY[subset]:
			
 
				+				features = feature_file_name(subset_phony, dataset_info, model_info)
			
 
				+				feature_path = join(self.root, "features", features)
			
 
				+				if isfile(feature_path): break
			
 
				+				tried.append(feature_path)
			
 
				+			else:
			
 
				+				raise ValueError(
			
 
				+					"Could not find any features in \"{}\" for {} subset. Tried features: {}".format(
			
 
				+					join(self.root, "features"), subset, tried))
			
 
				+
			
 
				+			logging.info("Using features file from \"{}\"".format(feature_path))
			
 
				+			new_kwargs["features"] = feature_path
			
 
				+		new_kwargs.update(kwargs)
			
 
				+
			
 
				+		logging.debug("Final kwargs: {}".format(pretty_print_dict(new_kwargs)))
			
 
				+		return new_kwargs
			
 
				+
			
 
				+	@abc.abstractmethod
			
 
				+	def read_annotation_files(self):
			
 
				+		raise NotImplementedError
			
 
				+
			
 
				+	@abc.abstractmethod
			
 
				+	def _parse_uuids(self):
			
 
				+		raise NotImplementedError
			
 
				+
			
 
				+	@abc.abstractmethod
			
 
				+	def _parse_labels(self):
			
 
				+		raise NotImplementedError
			
 
				+
			
 
				+	@abc.abstractmethod
			
 
				+	def _parse_split(self):
			
 
				+		raise NotImplementedError
			
 
				+
			
 
				+
			
 
				+class _BaseAnnotations(abc.ABC):
			
 
				+
			
 
				 	FEATURE_PHONY = dict(train=["train"], test=["test", "val"])
			
 
				 
			
 
				 	@classmethod
			
@@ -54,7 +201,6 @@ class BaseAnnotations(abc.ABC):
 
				 
			
 
				 		self.load()
			
 
				 
			
 
				-
			
 
				 	@property
			
 
				 	@only_with_info
			
 
				 	def data_root(self):
			
--- a/cvdatasets/annotation/files.py
+++ b/cvdatasets/annotation/files.py
@@ -1,4 +1,5 @@
 
				-import abc
			
 
				+# import abc
			
 
				+import os
			
 
				 import logging
			
 
				 import simplejson as json
			
 
				 import warnings
			
@@ -9,7 +10,7 @@ from typing import Callable
 
				 from typing import Dict
			
 
				 from typing import List
			
 
				 
			
 
				-class BaseAnnotationFiles(abc.ABC):
			
 
				+class AnnotationFiles(object):
			
 
				 
			
 
				 	@staticmethod
			
 
				 	def _parse_opts(fpath_and_opts):
			
@@ -22,9 +23,10 @@ class BaseAnnotationFiles(abc.ABC):
 
				 
			
 
				 
			
 
				 	def __init__(self, *files, root=".", load_strict=False, **named_files):
			
 
				-		super(BaseAnnotationFiles, self).__init__()
			
 
				+		super(AnnotationFiles, self).__init__()
			
 
				 		self.load_strict = load_strict
			
 
				 		self.root = Path(root)
			
 
				+		self._files = []
			
 
				 
			
 
				 		for fpath in files:
			
 
				 			fpath, opts = self._parse_opts(fpath)
			
@@ -61,8 +63,18 @@ class BaseAnnotationFiles(abc.ABC):
 
				 
			
 
				 			return reader(f)
			
 
				 
			
 
				-	def read_directory(self, fpath):
			
 
				-		raise NotImplementedError("IMPLEMENT ME!")
			
 
				+	def read_directory(self, folder_path):
			
 
				+		logging.info(f"Loading files from folder \"{folder_path}\" ...")
			
 
				+
			
 
				+		_content = [
			
 
				+			Path(path) / file
			
 
				+				for path, folders, files in os.walk(folder_path)
			
 
				+					for file in files
			
 
				+		]
			
 
				+
			
 
				+		logging.info(f"Found {len(_content):,d} files in \"{folder_path}\"")
			
 
				+		return _content
			
 
				+		# setattr(self, attr, _content)
			
 
				 
			
 
				 	def add_file_content(self, fpath, optional=False, *args, attr=None, **kwargs):
			
 
				 		fpath = self._path(fpath)
			
@@ -84,10 +96,11 @@ class BaseAnnotationFiles(abc.ABC):
 
				 		else:
			
 
				 			logging.debug(f"\"{fpath}\" was not found and was ignored, since it was marked as optional")
			
 
				 
			
 
				+		self._files.append(attr)
			
 
				 		setattr(self, attr, content)
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-	files = BaseAnnotationFiles(
			
 
				+	files = AnnotationFiles(
			
 
				 		"foo.txt",
			
 
				 		tad="bar.txt",
			
 
				 		bar=("fobar.txt", True),
			
--- a/cvdatasets/annotation/mixins/__init__.py
+++ b/cvdatasets/annotation/mixins/__init__.py
@@ -1,3 +1,10 @@
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.features_mixin import FeaturesMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				+
			
 
				+__all__ = [
			
 
				+	"BBoxMixin",
			
 
				+	"FeaturesMixin",
			
 
				+	"PartsMixin",
			
 
				+]
			
 
				 
			
 
				-from cvdatasets.annotations.mixins.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.mixins.parts_mixin import PartsMixin
			
--- a/cvdatasets/annotation/mixins/features_mixin.py
+++ b/cvdatasets/annotation/mixins/features_mixin.py
@@ -0,0 +1,3 @@
 
				+
			
 
				+class FeaturesMixin(object):
			
 
				+	pass
			
--- a/cvdatasets/annotation/types.py
+++ b/cvdatasets/annotation/types.py
@@ -1,68 +0,0 @@
 
				-from cvdatasets.annotations.impl.birdsnap import BSNAP_Annotations
			
 
				-from cvdatasets.annotations.impl.cars import CARS_Annotations
			
 
				-from cvdatasets.annotations.impl.cub import CUB_Annotations
			
 
				-from cvdatasets.annotations.impl.dogs import DOGS_Annotations
			
 
				-from cvdatasets.annotations.impl.flowers import FLOWERS_Annotations
			
 
				-from cvdatasets.annotations.impl.hed import HED_Annotations
			
 
				-from cvdatasets.annotations.impl.imagenet import INET_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT18_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT19_Annotations
			
 
				-from cvdatasets.annotations.impl.inat import INAT20_Annotations
			
 
				-from cvdatasets.annotations.impl.nab import NAB_Annotations
			
 
				-from cvdatasets.annotations.impl.tigers import TIGERS_Annotations
			
 
				-
			
 
				-from cvargparse.utils import BaseChoiceType
			
 
				-from functools import partial
			
 
				-
			
 
				-class AnnotationType(BaseChoiceType):
			
 
				-	IMAGENET = INET_Annotations
			
 
				-
			
 
				-	CUB200 = CUB_Annotations
			
 
				-	BIRDSNAP = BSNAP_Annotations
			
 
				-	NAB = NAB_Annotations
			
 
				-
			
 
				-	CARS = CARS_Annotations
			
 
				-	DOGS = DOGS_Annotations
			
 
				-
			
 
				-	FLOWERS = FLOWERS_Annotations
			
 
				-
			
 
				-	HED = HED_Annotations
			
 
				-	TIGERS = TIGERS_Annotations
			
 
				-
			
 
				-	INAT18 = INAT18_Annotations
			
 
				-	INAT19 = INAT19_Annotations
			
 
				-	INAT20 = INAT20_Annotations
			
 
				-
			
 
				-	Default = CUB200
			
 
				-
			
 
				-	@classmethod
			
 
				-	def phony(cls, key):
			
 
				-		""" returns for a key a list of datasets,
			
 
				-			that use the same annotation class """
			
 
				-
			
 
				-		return {
			
 
				-			cls.CUB200 : [ "CUB200_2FOLD", "CUB200_GOOGLE", "CUB200_GOOGLE_SEM" ],
			
 
				-			cls.TIGERS : [ "TIGERS_TEST" ],
			
 
				-			cls.INAT19 : [ "INAT19_TEST", "INAT19_MINI" ],
			
 
				-			cls.INAT20 : [ "INAT20_TEST",
			
 
				-				"INAT20_IN_CLASS",
			
 
				-				"INAT20_OUT_CLASS",
			
 
				-				"INAT20_NOISY_IN_CLASS",
			
 
				-				"INAT20_NOISY_OUT_CLASS",
			
 
				-				"INAT20_U_IN_CLASS",
			
 
				-				"INAT20_U_OUT_CLASS",
			
 
				-			],
			
 
				-			cls.IMAGENET : [ "IMAGENET_TOP_INAT20" ],
			
 
				-		}.get(key, [])
			
 
				-
			
 
				-	@classmethod
			
 
				-	def as_choices(cls, add_phony=True):
			
 
				-		choices = super(AnnotationType, cls).as_choices()
			
 
				-		if not add_phony:
			
 
				-			return choices
			
 
				-
			
 
				-		for key in cls:
			
 
				-			for phony in cls.phony(key):
			
 
				-				choices[phony.lower()] = choices[key.name.lower()]
			
 
				-
			
 
				-		return choices
			
--- a/cvdatasets/annotation/types/__init__.py
+++ b/cvdatasets/annotation/types/__init__.py
@@ -0,0 +1,66 @@
 
				+from cvdatasets.annotation.types.file_list import FileListAnnotations
			
 
				+from cvdatasets.annotation.types.folder_annotations import FolderAnnotations
			
 
				+from cvdatasets.annotation.types.json_annotations import JSONAnnotations
			
 
				+
			
 
				+from cvargparse.utils import BaseChoiceType
			
 
				+from cvargparse.utils.enumerations import MetaBaseType
			
 
				+
			
 
				+class AnnotationMetaType(MetaBaseType):
			
 
				+	def __getitem__(cls, key):
			
 
				+		res = super(AnnotationMetaType, cls).__getitem__(key)
			
 
				+		res.value.name = key
			
 
				+		return res
			
 
				+
			
 
				+class AnnotationType(BaseChoiceType, metaclass=AnnotationMetaType):
			
 
				+	FOLDER = FolderAnnotations
			
 
				+	FILE_LIST = FileListAnnotations
			
 
				+	JSON = JSONAnnotations
			
 
				+
			
 
				+	Default = FILE_LIST
			
 
				+
			
 
				+	@classmethod
			
 
				+	def phony(cls, key):
			
 
				+		""" returns for a key a list of datasets,
			
 
				+			that use the same annotation class """
			
 
				+
			
 
				+		return {
			
 
				+			cls.FOLDER : [
			
 
				+				"IMAGENET", "IMAGENET_TOP_INAT20"
			
 
				+			],
			
 
				+
			
 
				+			cls.FILE_LIST : [
			
 
				+				"CUB200", "CUB200_2FOLD", "CUB200_GOOGLE", "CUB200_GOOGLE_SEM"
			
 
				+				"NAB", "BIRDSNAP",
			
 
				+				"CARS", "DOGS", "FLOWERS"
			
 
				+				"HED", "TIGERS", "TIGERS_TEST"
			
 
				+
			
 
				+			],
			
 
				+
			
 
				+			cls.JSON : [
			
 
				+				"INAT18",
			
 
				+				"INAT19", "INAT19_TEST", "INAT19_MINI",
			
 
				+				"INAT20", "INAT20_TEST",
			
 
				+				"INAT20_IN_CLASS",
			
 
				+				"INAT20_OUT_CLASS",
			
 
				+				"INAT20_NOISY_IN_CLASS",
			
 
				+				"INAT20_NOISY_OUT_CLASS",
			
 
				+				"INAT20_U_IN_CLASS",
			
 
				+				"INAT20_U_OUT_CLASS",
			
 
				+			],
			
 
				+
			
 
				+		}.get(key, [])
			
 
				+
			
 
				+	@classmethod
			
 
				+	def as_choices(cls, add_phony=True):
			
 
				+		choices = super(AnnotationType, cls).as_choices()
			
 
				+		if not add_phony:
			
 
				+			return choices
			
 
				+
			
 
				+		for key in cls:
			
 
				+			for phony in cls.phony(key):
			
 
				+				choices[phony.lower()] = choices[key.name.lower()]
			
 
				+
			
 
				+		return choices
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+	print(AnnotationType.as_choices)
			
--- a/cvdatasets/annotation/types/birdsnap.py
+++ b/cvdatasets/annotation/types/birdsnap.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/cars.py
+++ b/cvdatasets/annotation/types/cars.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/cub.py
+++ b/cvdatasets/annotation/types/cub.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/dogs.py
+++ b/cvdatasets/annotation/types/dogs.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/file_list.py
+++ b/cvdatasets/annotation/types/file_list.py
@@ -0,0 +1,49 @@
 
				+import numpy as np
			
 
				+
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.files import AnnotationFiles
			
 
				+
			
 
				+class FileListAnnotations(BaseAnnotations):
			
 
				+
			
 
				+	def read_annotation_files(self) -> AnnotationFiles:
			
 
				+		return AnnotationFiles(
			
 
				+			"images.txt", "labels.txt", "tr_ID.txt",
			
 
				+			root=self.root,
			
 
				+			load_strict=True,
			
 
				+		)
			
 
				+
			
 
				+	def _parse_uuids(self) -> None:
			
 
				+		assert self.files.images is not None, \
			
 
				+			"Images were not loaded!"
			
 
				+		uuid_fnames = [i.split() for i in self.files.images]
			
 
				+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
			
 
				+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
			
 
				+
			
 
				+	def _parse_labels(self) -> None:
			
 
				+		assert self.files.labels is not None, \
			
 
				+			"Labels were not loaded!"
			
 
				+		labs = list(map(int, self.files.labels))
			
 
				+		self.labels = np.array(labs, dtype=np.int32)
			
 
				+
			
 
				+	def _parse_split(self) -> None:
			
 
				+		assert self.files.tr_ID is not None, \
			
 
				+			"Train-test split was not loaded!"
			
 
				+		assert hasattr(self, "uuids"), \
			
 
				+			"UUIDs were not parsed yet! Please call _parse_uuids before this method!"
			
 
				+		uuid_to_split = {uuid: int(split) for uuid, split in zip(self.uuids, self.files.tr_ID)}
			
 
				+		self.train_split = np.array([uuid_to_split[uuid] for uuid in self.uuids], dtype=bool)
			
 
				+		self.test_split = np.logical_not(self.train_split)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+	annot = FileListAnnotations(
			
 
				+		root_or_infofile="/home/korsch_data/datasets/birds/cub200/ORIGINAL")
			
 
				+
			
 
				+	for i, uuid in enumerate(annot.uuids):
			
 
				+		print(uuid, annot[uuid])
			
 
				+
			
 
				+		if i >= 10:
			
 
				+			break
			
 
				+
			
 
				+	train, test = annot.new_train_test_datasets()
			
 
				+
			
 
				+	print(len(train), len(test))
			
--- a/cvdatasets/annotation/types/flowers.py
+++ b/cvdatasets/annotation/types/flowers.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/folder_annotations.py
+++ b/cvdatasets/annotation/types/folder_annotations.py
@@ -0,0 +1,70 @@
 
				+import numpy as np
			
 
				+
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.files import AnnotationFiles
			
 
				+
			
 
				+class FolderAnnotations(BaseAnnotations):
			
 
				+
			
 
				+	def read_annotation_files(self) -> AnnotationFiles:
			
 
				+		return AnnotationFiles(
			
 
				+			train_images="ILSVRC2012_img_train",
			
 
				+			val_images="ILSVRC2012_img_val",
			
 
				+			test_images=("ILSVRC2012_img_test", True),
			
 
				+			root=self.root,
			
 
				+			load_strict=True,
			
 
				+		)
			
 
				+
			
 
				+	@property
			
 
				+	def _has_test_set(self) -> bool:
			
 
				+		return self.files.test_images is not None
			
 
				+
			
 
				+
			
 
				+	def _parse_uuids(self) -> None:
			
 
				+		self.images_folder = ""
			
 
				+
			
 
				+		train_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
			
 
				+			fpath in self.files.train_images]
			
 
				+
			
 
				+		val_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
			
 
				+			fpath in self.files.val_images]
			
 
				+
			
 
				+		if self._has_test_set:
			
 
				+			test_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
			
 
				+				fpath in self.files.test_images]
			
 
				+
			
 
				+		uuid_fnames = train_uuid_fnames + val_uuid_fnames
			
 
				+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
			
 
				+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
			
 
				+
			
 
				+
			
 
				+	def _parse_labels(self) -> None:
			
 
				+		train_labs = [fpath.parent.name for fpath in self.files.train_images]
			
 
				+		val_labs = [fpath.parent.name for fpath in self.files.val_images]
			
 
				+		labs = train_labs + val_labs
			
 
				+
			
 
				+		if self._has_test_set:
			
 
				+			self.test_labels = [fpath.parent.name for fpath in self.files.test_images]
			
 
				+
			
 
				+		self._classes, self.labels = np.unique(labs, return_inverse=True)
			
 
				+
			
 
				+
			
 
				+	def _parse_split(self) -> None:
			
 
				+		self.train_split = np.ones(len(self.uuids), dtype=bool)
			
 
				+		self.train_split[len(self.files.train_images):] = False
			
 
				+
			
 
				+		self.test_split = np.logical_not(self.train_split)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+	annot = FolderAnnotations(
			
 
				+		root_or_infofile="/home/korsch_data/datasets/ImageNet/TOP_INAT20")
			
 
				+
			
 
				+	for i, uuid in enumerate(annot.uuids):
			
 
				+		print(uuid, annot[uuid])
			
 
				+
			
 
				+		if i >= 10:
			
 
				+			break
			
 
				+
			
 
				+	train, test = annot.new_train_test_datasets()
			
 
				+
			
 
				+	print(len(train), len(test))
			
--- a/cvdatasets/annotation/types/hed.py
+++ b/cvdatasets/annotation/types/hed.py
@@ -3,9 +3,9 @@ import simplejson as json
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/imagenet.py
+++ b/cvdatasets/annotation/types/imagenet.py
@@ -4,8 +4,8 @@ import logging
 
				 
			
 
				 from pathlib import Path
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 class INET_Annotations(PartsMixin, BaseAnnotations):
			
--- a/cvdatasets/annotation/types/inat.py
+++ b/cvdatasets/annotation/types/inat.py
@@ -4,12 +4,11 @@ import logging
 
				 import numpy as np
			
 
				 import simplejson as json
			
 
				 
			
 
				-from os.path import isfile
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/json_annotations.py
+++ b/cvdatasets/annotation/types/json_annotations.py
@@ -0,0 +1,98 @@
 
				+import copy
			
 
				+import hashlib
			
 
				+import logging
			
 
				+import numpy as np
			
 
				+
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.files import AnnotationFiles
			
 
				+
			
 
				+def _uuid_check(uuids):
			
 
				+	return len(np.unique(uuids)) == len(uuids)
			
 
				+
			
 
				+def _uuid_entry(im_info):
			
 
				+	return hashlib.md5(im_info["file_name"].encode()).hexdigest()
			
 
				+
			
 
				+class JSONAnnotations(BaseAnnotations):
			
 
				+
			
 
				+	def read_annotation_files(self) -> AnnotationFiles:
			
 
				+		return AnnotationFiles(
			
 
				+			"trainval.json", "val.json",
			
 
				+			("unlabeled_train.json", True),
			
 
				+			root=self.root,
			
 
				+			load_strict=True,
			
 
				+		)
			
 
				+
			
 
				+	@property
			
 
				+	def has_unlabeled_data(self) -> bool:
			
 
				+		return self.files.unlabeled_train is not None
			
 
				+
			
 
				+	def _parse_uuids(self) -> None:
			
 
				+
			
 
				+		uuid_fnames = [(str(im["id"]), im["file_name"]) for im in self.files.trainval["images"]]
			
 
				+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
			
 
				+
			
 
				+		assert _uuid_check(self.uuids) , \
			
 
				+			"UUIDs are not unique!"
			
 
				+
			
 
				+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
			
 
				+
			
 
				+		if self.has_unlabeled_data:
			
 
				+			logging.info("Loading unlabeled data...")
			
 
				+			self._parse_unlabeled()
			
 
				+		else:
			
 
				+			logging.info("No unlabeled data was provided!")
			
 
				+
			
 
				+	def _parse_unlabeled(self) -> None:
			
 
				+
			
 
				+		uuid_fnames = [(_uuid_entry(im), im["file_name"]) for im in self.files.unlabeled_train["images"]]
			
 
				+
			
 
				+		self.unlabeled = unlabeled = copy.copy(self)
			
 
				+
			
 
				+		unlabeled.uuids, unlabeled.image_names = map(np.array, zip(*uuid_fnames))
			
 
				+		unlabeled.labels = np.full(unlabeled.image_names.shape, -1, dtype=np.int32)
			
 
				+		unlabeled.train_split = np.full(unlabeled.image_names.shape, 1, dtype=bool)
			
 
				+		unlabeled.test_split = np.full(unlabeled.image_names.shape, 0, dtype=bool)
			
 
				+
			
 
				+		assert len(np.unique(unlabeled.uuids)) == len(unlabeled.uuids), \
			
 
				+			"Unlabeled UUIDs are not unique!"
			
 
				+
			
 
				+		overlap = set(self.uuids) & set(unlabeled.uuids)
			
 
				+		assert len(overlap) == 0, \
			
 
				+			f"Unlabeled and labeled UUIDs overlap: {overlap}"
			
 
				+
			
 
				+		unlabeled.uuid_to_idx = {uuid: i for i, uuid in enumerate(unlabeled.uuids)}
			
 
				+
			
 
				+
			
 
				+	def _parse_labels(self) -> None:
			
 
				+		self.labels = np.zeros(len(self.uuids), dtype=np.int32)
			
 
				+		labs = {str(annot["image_id"]): annot["category_id"]
			
 
				+			for annot in self.files.trainval["annotations"]}
			
 
				+
			
 
				+		for uuid in self.uuids:
			
 
				+			self.labels[self.uuid_to_idx[uuid]] = labs[uuid]
			
 
				+
			
 
				+
			
 
				+	def _parse_split(self) -> None:
			
 
				+		self.train_split = np.ones(len(self.uuids), dtype=bool)
			
 
				+		val_uuids = [str(im["id"]) for im in self.files.val["images"]]
			
 
				+		for v_uuid in val_uuids:
			
 
				+			self.train_split[self.uuid_to_idx[v_uuid]] = False
			
 
				+
			
 
				+		self.test_split = np.logical_not(self.train_split)
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+	annot = JSONAnnotations(
			
 
				+		root_or_infofile="/home/korsch_data/datasets/inat/2020/IN_CLASS")
			
 
				+
			
 
				+	for i, uuid in enumerate(annot.uuids):
			
 
				+		print(uuid, annot[uuid])
			
 
				+
			
 
				+		if i >= 4:
			
 
				+			break
			
 
				+
			
 
				+	train, test = annot.new_train_test_datasets()
			
 
				+
			
 
				+	print(len(train), len(test))
			
 
				+
			
--- a/cvdatasets/annotation/types/nab.py
+++ b/cvdatasets/annotation/types/nab.py
@@ -2,9 +2,9 @@ import numpy as np
 
				 
			
 
				 from os.path import join
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
			
 
				-from cvdatasets.annotations.base.parts_mixin import PartsMixin
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
			
 
				+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 
			
--- a/cvdatasets/annotation/types/tigers.py
+++ b/cvdatasets/annotation/types/tigers.py
@@ -5,7 +5,7 @@ from os.path import isfile
 
				 from os.path import join
			
 
				 from sklearn.model_selection import StratifiedShuffleSplit
			
 
				 
			
 
				-from cvdatasets.annotations.base import BaseAnnotations
			
 
				+from cvdatasets.annotation.base import BaseAnnotations
			
 
				 from cvdatasets.utils import _MetaInfo
			
 
				 
			
 
				 class TIGERS_Annotations(BaseAnnotations):
			
--- a/cvdatasets/annotations/__init__.py
+++ b/cvdatasets/annotations/__init__.py
--- a/scripts/display.py
+++ b/scripts/display.py
@@ -9,7 +9,7 @@ import matplotlib.pyplot as plt
 
				 
			
 
				 from argparse import ArgumentParser
			
 
				 
			
 
				-from cvdatasets.annotations import AnnotationType
			
 
				+from cvdatasets.annotation import AnnotationType
			
 
				 from utils import parser, plot_crops
			
 
				 
			
 
				 def main(args):
			
@@ -17,7 +17,6 @@ def main(args):
 
				 		f"AnnotationType is not known: \"{args.dataset}\""
			
 
				 
			
 
				 	annotation_cls = AnnotationType[args.dataset].value
			
 
				-
			
 
				 	logging.info(f"Loading \"{args.dataset}\" annnotations from \"{args.data}\"")
			
 
				 	annot = annotation_cls(root_or_infofile=args.data, parts=args.parts, load_strict=False)
			
 
				 
			
--- a/scripts/info_files/info.yml
+++ b/scripts/info_files/info.yml
@@ -55,6 +55,11 @@ DATASETS:
 
				     annotations: "BJOERN"
			
 
				     n_classes: 1000
			
 
				 
			
 
				+  IMAGENET_TOP_INAT20: &inet_top_inat20
			
 
				+    <<: *inet
			
 
				+    annotations: "TOP_INAT20"
			
 
				+    n_classes: 44
			
 
				+
			
 
				   CUB200:         &cub200
			
 
				     folder: birds/cub200
			
 
				     annotations: "ORIGINAL"
			
@@ -230,6 +235,10 @@ PARTS:
 
				     <<: *inet
			
 
				     <<: *parts_global
			
 
				 
			
 
				+  IMAGENET_TOP_INAT20_GLOBAL:
			
 
				+    <<: *inet_top_inat20
			
 
				+    <<: *parts_global
			
 
				+
			
 
				   CUB200_2FOLD_GLOBAL:
			
 
				     <<: *cub200_2fold
			
 
				     <<: *parts_global
			
--- a/scripts/utils/parser.py
+++ b/scripts/utils/parser.py
@@ -1,7 +1,7 @@
 
				 import os
			
 
				 from cvargparse import BaseParser, Arg
			
 
				 
			
 
				-from cvdatasets.annotations import AnnotationType
			
 
				+from cvdatasets.annotation import AnnotationType
			
 
				 
			
 
				 from cvdatasets.utils import read_info_file
			
 
				 
			
--- a/tests/test_annotations.py
+++ b/tests/test_annotations.py
@@ -9,10 +9,10 @@ from os.path import *
 
				 from abc import ABC, abstractproperty
			
 
				 
			
 
				 
			
 
				-from cvdatasets import BaseAnnotations, _MetaInfo
			
 
				+from cvdatasets import FileListAnnotations
			
 
				 from cvdatasets.utils import read_info_file
			
 
				 
			
 
				-class MockAnnotation(BaseAnnotations):
			
 
				+class MockAnnotation(FileListAnnotations):
			
 
				 	name = "MOCK"
			
 
				 	index_offset = 0