Browse Source

Merge branch 'master' of triton.inf-cv.uni-jena.de:ComputerVisionJena/cvdatasets

Dimitri Korsch 4 years ago
parent
commit
c787793d3b
38 changed files with 1041 additions and 745 deletions
  1. 28 12
      cvdatasets/__init__.py
  2. 1 1
      cvdatasets/_version.py
  3. 0 0
      cvdatasets/annotation/__init__.py
  4. 196 0
      cvdatasets/annotation/base.py
  5. 112 0
      cvdatasets/annotation/files.py
  6. 12 0
      cvdatasets/annotation/mixins/__init__.py
  7. 52 0
      cvdatasets/annotation/mixins/bbox_mixin.py
  8. 42 0
      cvdatasets/annotation/mixins/features_mixin.py
  9. 49 0
      cvdatasets/annotation/mixins/multi_box_mixin.py
  10. 116 0
      cvdatasets/annotation/mixins/parts_mixin.py
  11. 3 3
      cvdatasets/annotation/old_types/birdsnap.py
  12. 3 3
      cvdatasets/annotation/old_types/cars.py
  13. 3 3
      cvdatasets/annotation/old_types/cub.py
  14. 3 3
      cvdatasets/annotation/old_types/dogs.py
  15. 3 3
      cvdatasets/annotation/old_types/flowers.py
  16. 3 3
      cvdatasets/annotation/old_types/hed.py
  17. 2 2
      cvdatasets/annotation/old_types/imagenet.py
  18. 3 4
      cvdatasets/annotation/old_types/inat.py
  19. 3 3
      cvdatasets/annotation/old_types/nab.py
  20. 1 1
      cvdatasets/annotation/old_types/tigers.py
  21. 76 0
      cvdatasets/annotation/types/__init__.py
  22. 50 0
      cvdatasets/annotation/types/file_list.py
  23. 69 0
      cvdatasets/annotation/types/folder_annotations.py
  24. 97 0
      cvdatasets/annotation/types/json_annotations.py
  25. 0 37
      cvdatasets/annotations/__init__.py
  26. 0 68
      cvdatasets/annotations/annotation_types.py
  27. 0 200
      cvdatasets/annotations/base/__init__.py
  28. 0 34
      cvdatasets/annotations/base/bbox_mixin.py
  29. 0 84
      cvdatasets/annotations/base/parts_mixin.py
  30. 15 4
      cvdatasets/dataset/__init__.py
  31. 35 5
      cvdatasets/dataset/mixins/parts.py
  32. 1 4
      scripts/config.sh
  33. 4 9
      scripts/display.py
  34. 9 4
      scripts/display.sh
  35. 45 248
      scripts/info_files/info.yml
  36. 1 1
      scripts/tests.sh
  37. 1 3
      scripts/utils/parser.py
  38. 3 3
      tests/test_annotations.py

+ 28 - 12
cvdatasets/__init__.py

@@ -1,13 +1,29 @@
-from .dataset import Dataset, ImageWrapperDataset
-
-from .annotations import BaseAnnotations
-from .annotations import CUB_Annotations
-from .annotations import NAB_Annotations
-from .annotations import CARS_Annotations
-from .annotations import INAT19_Annotations
-from .annotations import FLOWERS_Annotations
-from .annotations import HED_Annotations
-from .annotations import AnnotationType
-
-from .utils import _MetaInfo
+from cvdatasets.annotation.base import Annotations
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.files import AnnotationFiles
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.features_mixin import FeaturesMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
+from cvdatasets.annotation.types import AnnotationType
+from cvdatasets.annotation.types.file_list import FileListAnnotations
+from cvdatasets.annotation.types.folder_annotations import FolderAnnotations
+from cvdatasets.annotation.types.json_annotations import JSONAnnotations
+from cvdatasets.dataset import Dataset
+from cvdatasets.dataset import ImageWrapperDataset
+from cvdatasets.utils import _MetaInfo
 
+__all__ = [
+	"_MetaInfo",
+	"Annotations",
+	"AnnotationFiles",
+	"AnnotationType",
+	"BaseAnnotations",
+	"BBoxMixin",
+	"Dataset",
+	"FileListAnnotations",
+	"FolderAnnotations",
+	"FolderAnnotations",
+	"ImageWrapperDataset",
+	"JSONAnnotations",
+	"PartsMixin",
+]

+ 1 - 1
cvdatasets/_version.py

@@ -1 +1 @@
-__version__ = "0.7.0"
+__version__ = "0.8.0"

+ 0 - 0
cvdatasets/annotations/impl/__init__.py → cvdatasets/annotation/__init__.py


+ 196 - 0
cvdatasets/annotation/base.py

@@ -0,0 +1,196 @@
+import abc
+import logging
+import numpy as np
+
+from collections import OrderedDict
+from collections import defaultdict
+from pathlib import Path
+from typing import Tuple
+
+from cvdatasets.annotation import mixins
+from cvdatasets.annotation.files import AnnotationFiles
+from cvdatasets.dataset import Dataset
+from cvdatasets.utils import feature_file_name
+from cvdatasets.utils import pretty_print_dict
+from cvdatasets.utils import read_info_file
+from cvdatasets.utils.decorators import only_with_info
+
+
+class BaseAnnotations(abc.ABC):
+
+	@classmethod
+	def extract_kwargs(cls, opts):
+		return dict(
+			root_or_infofile=opts.data,
+			load_strict=getattr(opts, "load_strict", False),
+			dataset_key=getattr(opts, "dataset", None)
+		)
+
+	@classmethod
+	def new(cls, opts, **_kwargs):
+		kwargs = cls.extract_kwargs(opts)
+		kwargs.update(_kwargs)
+		kwargs_str = pretty_print_dict(kwargs)
+		try:
+			annot = cls(**kwargs)
+		except Exception as e:
+			logging.error(f"Failed to create \"{cls.__name__}\" annotations " + \
+				f"with following kwargs: \"{kwargs_str}\". " + \
+				f"Error was: {e}"
+			)
+			raise
+		else:
+			logging.info(f"Loaded \"{annot.dataset_key}\" annotations " + \
+				f"with following kwargs: \"{kwargs_str}\""
+			)
+			return annot
+
+
+	def __init__(self, *, root_or_infofile, dataset_key=None, images_folder="images", load_strict=True, **kwargs):
+
+		self.dataset_key = dataset_key
+		self.images_folder = images_folder
+		self.load_strict = load_strict
+
+		root_or_infofile = Path(root_or_infofile)
+		if root_or_infofile.is_dir():
+			self.info = None
+			self.root = root_or_infofile
+
+		elif root_or_infofile.is_file():
+			self.info = read_info_file(root_or_infofile)
+			ds_info = self.dataset_info
+			self.root = self.data_root / ds_info.folder / ds_info.annotations
+
+		else:
+			msg = f"Root folder or info file does not exist: \"{root_or_infofile}\""
+			raise ValueError(msg)
+
+		assert self.root.is_dir(), \
+			f"Annotation directory does not exist: \"{self.root}\"!"
+
+		self.files = self.read_annotation_files()
+		self.parse_annotations()
+
+	@property
+	@only_with_info
+	def data_root(self):
+		return Path(self.info.BASE_DIR) / self.info.DATA_DIR
+
+	@property
+	@only_with_info
+	def dataset_key(self):
+		if self._dataset_key is not None:
+			return self._dataset_key
+
+		else:
+			return self.__class__.__name__
+
+	@dataset_key.setter
+	def dataset_key(self, value):
+		self._dataset_key = value
+
+	@property
+	@only_with_info
+	def dataset_info(self):
+		key = self.dataset_key
+
+		if key not in self.info.DATASETS:
+			raise ValueError(f"Cannot find dataset with key \"{key}\"")
+
+		return self.info.DATASETS[key]
+
+	def parse_annotations(self):
+		logging.debug("Parsing read annotations (uuids, labels and train-test splits)")
+		self._parse_uuids()
+		self._parse_labels()
+		self._parse_split()
+
+	def __getitem__(self, uuid) -> Tuple[str, int]:
+		return self.image(uuid), self.label(uuid)
+
+	def image_path(self, image) -> str:
+		return str(self.root / self.images_folder / image)
+
+	def image(self, uuid) -> str:
+		fname = self.image_names[self.uuid_to_idx[uuid]]
+		return self.image_path(fname)
+
+	def label(self, uuid) -> int:
+		return self.labels[self.uuid_to_idx[uuid]].copy()
+
+	def bounding_box(self, uuid) -> object:
+		return None
+
+	def _uuids(self, split) -> np.ndarray:
+		return self.uuids[split]
+
+	@property
+	def train_uuids(self):
+		return self._uuids(self.train_split)
+
+	@property
+	def test_uuids(self):
+		return self._uuids(self.test_split)
+
+	def new_train_test_datasets(self, dataset_cls=Dataset, **kwargs):
+		return (self.new_dataset(subset, dataset_cls) for subset in ["train", "test"])
+
+	def new_dataset(self, subset=None, dataset_cls=Dataset, **kwargs):
+		if subset is not None:
+			uuids = getattr(self, "{}_uuids".format(subset))
+		else:
+			uuids = self.uuids
+
+		kwargs = self.check_dataset_kwargs(subset, **kwargs)
+		return dataset_cls(uuids=uuids, annotations=self, **kwargs)
+
+	def check_dataset_kwargs(self, subset, **kwargs):
+		dataset_info = self.dataset_info
+		if dataset_info is None:
+			return kwargs
+
+		logging.debug("Dataset info: {}".format(pretty_print_dict(dataset_info)))
+
+		# TODO: pass all scales
+		new_kwargs = {}
+
+		if "scales" in dataset_info and len(dataset_info.scales):
+			new_kwargs["ratio"] = dataset_info.scales[0]
+
+		if "is_uniform" in dataset_info:
+			new_kwargs["uniform_parts"] = dataset_info.is_uniform
+
+		new_kwargs.update(kwargs)
+		logging.debug("Final kwargs: {}".format(pretty_print_dict(new_kwargs)))
+		return new_kwargs
+
+	def read_annotation_files(self) -> AnnotationFiles:
+		logging.debug("Creating default AnnotationFiles object")
+		files = AnnotationFiles(root=self.root, load_strict=self.load_strict)
+		return self.load_files(files)
+
+	@abc.abstractmethod
+	def load_files(self, files_obj) -> AnnotationFiles:
+		return files_obj
+
+	@abc.abstractmethod
+	def _parse_uuids(self) -> None:
+		pass
+
+	@abc.abstractmethod
+	def _parse_labels(self) -> None:
+		pass
+
+	@abc.abstractmethod
+	def _parse_split(self) -> None:
+		pass
+
+
+class Annotations(
+	mixins.BBoxMixin,
+	mixins.MultiBoxMixin,
+	mixins.PartsMixin,
+	mixins.FeaturesMixin,
+	BaseAnnotations):
+	pass

+ 112 - 0
cvdatasets/annotation/files.py

@@ -0,0 +1,112 @@
+# import abc
+import os
+import logging
+import simplejson as json
+import warnings
+
+from pathlib import Path
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+
+class AnnotationFiles(object):
+
+	@staticmethod
+	def _parse_opts(fpath_and_opts):
+		if isinstance(fpath_and_opts, (list, tuple)):
+			fpath, *opts = fpath_and_opts
+		else:
+			fpath, opts = fpath_and_opts, []
+
+		return fpath, opts
+
+	def __init__(self, *files, root=".", load_strict=False, **named_files):
+		super(AnnotationFiles, self).__init__()
+		self.load_strict = load_strict
+		self.root = Path(root)
+		self._files = []
+
+		self.load_files(*files, **named_files)
+
+	def load_files(self, *files, **named_files):
+		for fpath in files:
+			fpath, opts = self._parse_opts(fpath)
+			self.add_file_content(fpath, *opts)
+
+		for attr, fpath in named_files.items():
+			fpath, opts = self._parse_opts(fpath)
+			self.add_file_content(fpath, *opts, attr=attr)
+
+	def _path(self, fname) -> Path:
+		return self.root / fname
+
+	def _json_reader(self, f) -> Dict[str, Any]:
+		return json.load(f)
+
+	def _line_reader(self, f) -> List[str]:
+		return [line.strip() for line in f if line.strip()]
+
+	def get_reader(self, fpath) -> Callable:
+		return {
+			".json": self._json_reader,
+			".txt": self._line_reader,
+		}.get(Path(fpath).suffix.lower())
+
+	def read_file(self, fpath):
+		with open(fpath) as f:
+			reader = self.get_reader(fpath)
+
+			if reader is None:
+				raise NotImplementedError(f"Don't know how to read \"{fpath.name}\"!")
+
+			elif not callable(reader):
+				raise ValueError(f"The reader for \"{fpath.name}\" was not callable!")
+
+			return reader(f)
+
+	def read_directory(self, folder_path):
+		logging.info(f"Loading files from folder \"{folder_path}\" ...")
+
+		_content = [
+			Path(path) / file
+				for path, folders, files in os.walk(folder_path)
+					for file in files
+		]
+
+		logging.info(f"Found {len(_content):,d} files in \"{folder_path}\"")
+		return _content
+		# setattr(self, attr, _content)
+
+	def add_file_content(self, fpath, optional=False, *args, attr=None, **kwargs):
+		fpath = self._path(fpath)
+		attr = attr or fpath.stem.replace(".", "_")
+		content = None
+
+		if fpath.is_file():
+			content = self.read_file(fpath)
+
+		elif fpath.is_dir():
+			content = self.read_directory(fpath)
+
+		elif not optional:
+			msg = f"File \"{fpath}\" was not found!"
+			if self.load_strict:
+				raise AssertionError(msg)
+			else:
+				warnings.warn(msg)
+		else:
+			logging.debug(f"\"{fpath}\" was not found and was ignored, since it was marked as optional")
+
+		self._files.append(attr)
+		setattr(self, attr, content)
+
+if __name__ == '__main__':
+	files = AnnotationFiles(
+		"foo.txt",
+		tad="bar.txt",
+		bar=("fobar.txt", True),
+		root="/Bla",
+		# load_strict=True,
+	)
+	print(files.foo, files.tad, files.bar)

+ 12 - 0
cvdatasets/annotation/mixins/__init__.py

@@ -0,0 +1,12 @@
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.features_mixin import FeaturesMixin
+from cvdatasets.annotation.mixins.multi_box_mixin import MultiBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
+
+__all__ = [
+	"BBoxMixin",
+	"FeaturesMixin",
+	"MultiBoxMixin",
+	"PartsMixin",
+]
+

+ 52 - 0
cvdatasets/annotation/mixins/bbox_mixin.py

@@ -0,0 +1,52 @@
+import abc
+import logging
+import numpy as np
+
+from cvdatasets.annotation.files import AnnotationFiles
+
+class BBoxMixin(abc.ABC):
+
+	dtype = np.dtype([
+		("x", np.int32),
+		("y", np.int32),
+		("w", np.int32),
+		("h", np.int32),
+	])
+
+	def read_annotation_files(self) -> AnnotationFiles:
+		files = super(BBoxMixin, self).read_annotation_files()
+		logging.debug("Adding bounding box annotation files")
+		files.load_files(
+			bounding_boxes=("bounding_boxes.txt", True),
+		)
+		return files
+
+	@property
+	def has_bounding_boxes(self) -> bool:
+		return self.files.bounding_boxes is not None
+
+	def parse_annotations(self) -> None:
+		super(BBoxMixin, self).parse_annotations()
+
+		if self.has_bounding_boxes:
+			self._parse_bounding_boxes()
+
+	def _parse_bounding_boxes(self) -> None:
+		logging.debug("Parsing bounding box annotations")
+		assert self.has_bounding_boxes, \
+			"Bounding boxes were not loaded!"
+
+		uuid_to_bbox = {}
+		for content in [i.split() for i in self.files.bounding_boxes]:
+			uuid, bbox = content[0], content[1:]
+			uuid_to_bbox[uuid] = [float(i) for i in bbox]
+
+		self.bounding_boxes = np.array(
+			[tuple(uuid_to_bbox[uuid]) for uuid in self.uuids],
+			dtype=self.dtype)
+
+	def bounding_box(self, uuid) -> np.ndarray:
+		if self.has_bounding_boxes:
+			return self.bounding_boxes[self.uuid_to_idx[uuid]].copy()
+
+		return np.array((0,0, 1,1), dtype=self.dtype)

+ 42 - 0
cvdatasets/annotation/mixins/features_mixin.py

@@ -0,0 +1,42 @@
+import abc
+import logging
+from cvdatasets.utils import feature_file_name
+
+class FeaturesMixin(abc.ABC):
+	FEATURE_PHONY = dict(train=["train"], test=["test", "val"])
+
+	@classmethod
+	def extract_kwargs(cls, opts):
+		kwargs = super(FeaturesMixin, cls).extract_kwargs(opts)
+		kwargs.update(dict(
+			feature_model=getattr(opts, "feature_model", None),
+		))
+		return kwargs
+
+	def __init__(self, *args, feature_model=None, feature_folder="features", **kwargs):
+		super(FeaturesMixin, self).__init__(*args, **kwargs)
+
+		self.feature_model = feature_model
+		self.feature_folder = feature_folder
+
+	def check_dataset_kwargs(self, subset, **kwargs):
+		kwargs = super(FeaturesMixin, self).check_dataset_kwargs(subset, **kwargs)
+
+		new_kwargs = {}
+		if None not in [subset, self.feature_model]:
+			tried = []
+			model_info = self.info.MODELS[self.feature_model]
+			for subset_phony in FeaturesMixin.FEATURE_PHONY[subset]:
+				features = feature_file_name(subset_phony, dataset_info, model_info)
+				feature_path = self.root / self.feature_folder / features
+				if feature_path.is_file(): break
+				tried.append(feature_path)
+			else:
+				raise ValueError(
+					f"Could not find any features in \"{self.root / self.feature_folder}\" for {subset} subset. Tried features: {tried}")
+
+			logging.info(f"Using features file from \"{feature_path}\"")
+			new_kwargs["features"] = feature_path
+
+		new_kwargs.update(kwargs)
+		return new_kwargs

+ 49 - 0
cvdatasets/annotation/mixins/multi_box_mixin.py

@@ -0,0 +1,49 @@
+import abc
+import logging
+import numpy as np
+
+from cvdatasets.annotation.files import AnnotationFiles
+
+
+class MultiBoxMixin(abc.ABC):
+
+	def read_annotation_files(self) -> AnnotationFiles:
+		files = super(MultiBoxMixin, self).read_annotation_files()
+
+		files.load_files(
+			multi_boxes=("multi_boxes.json", True),
+		)
+		return files
+
+	@property
+	def has_multi_boxes(self) -> bool:
+		return self.files.multi_boxes is not None
+
+	def parse_annotations(self) -> None:
+		super(MultiBoxMixin, self).parse_annotations()
+		if self.has_multi_boxes:
+			self._parse_multi_boxes()
+
+	def _parse_multi_boxes(self) -> None:
+		logging.debug("Parsing multi-box annotations")
+
+		assert self.has_multi_boxes, \
+			"Multi-boxes were not loaded!"
+
+		self.multi_boxes = {}
+
+		for uuid in self.uuids:
+			idx = self.uuid_to_idx[uuid]
+			im_name = self.image_names[idx]
+			multi_box = self.files.multi_boxes[idx]
+			assert im_name == multi_box["image"], \
+				f"{im_name} != {multi_box['image']}"
+
+			self.multi_boxes[uuid] = multi_box
+
+	def multi_box(self, uuid) -> np.ndarray:
+		if self.has_multi_boxes:
+			return self.multi_boxes[uuid]
+
+		fname = self.image_names[self.uuid_to_idx[uuid]]
+		return dict(image=fname, objects=[dict(x0=0, x1=0, y0=1, y1=1)])

+ 116 - 0
cvdatasets/annotation/mixins/parts_mixin.py

@@ -0,0 +1,116 @@
+import abc
+import copy
+import logging
+import numpy as np
+
+from collections import OrderedDict
+from collections import defaultdict
+from typing import Union
+
+from cvdatasets.annotation.files import AnnotationFiles
+from cvdatasets.utils.decorators import only_with_info
+
+class PartsMixin(abc.ABC):
+
+	@classmethod
+	def extract_kwargs(cls, opts):
+		kwargs = super(PartsMixin, cls).extract_kwargs(opts)
+		kwargs.update(dict(
+			parts=getattr(opts, "parts", None),
+		))
+		return kwargs
+
+
+	def __init__(self, *args, parts=None, **kwargs):
+		self.part_type = parts
+		self.part_names = OrderedDict()
+		self.part_name_list = []
+
+		super(PartsMixin, self).__init__(*args, **kwargs)
+
+	def read_annotation_files(self) -> AnnotationFiles:
+		files = super(PartsMixin, self).read_annotation_files()
+		logging.debug("Adding part annotation files")
+		files.load_files(
+			part_locs=("parts/part_locs.txt", True),
+			part_names=("parts/parts.txt", True),
+		)
+
+		return files
+
+	@property
+	@only_with_info
+	def dataset_info(self) -> dict:
+		ds_info = super(PartsMixin, self).dataset_info
+		if self.part_type is not None:
+			parts_key = f"{self.dataset_key}_{self.part_type}"
+			if parts_key in self.info.PARTS:
+				parts_info = self.info.PARTS[parts_key]
+			else:
+				parts_info = self.info.PART_TYPES[self.part_type]
+
+			ds_info = copy.deepcopy(ds_info)
+			ds_info.update(parts_info)
+
+		return ds_info
+
+	def check_dataset_kwargs(self, subset, **kwargs) -> dict:
+		if self.dataset_info is None:
+			return kwargs
+
+		new_kwargs = {}
+
+		if self.part_type is not None:
+			new_kwargs["part_rescale_size"] = self.dataset_info.rescale_size
+
+		new_kwargs.update(kwargs)
+
+		return super(PartsMixin, self).check_dataset_kwargs(subset, **new_kwargs)
+
+	@property
+	def has_parts(self) -> bool:
+		return self.files.part_locs is not None
+
+	@property
+	def has_part_names(self) -> bool:
+		return self.files.part_names is not None
+
+	def parse_annotations(self) -> None:
+		super(PartsMixin, self).parse_annotations()
+
+		if self.has_parts:
+			self._parse_parts()
+
+	def _parse_parts(self) -> None:
+		logging.debug("Parsing part annotations")
+		assert self.has_parts, \
+			"Part locations were not loaded!"
+		# this part is quite slow... TODO: some runtime improvements?
+		uuid_to_parts = defaultdict(list)
+		for content in [i.split() for i in self.files.part_locs]:
+			uuid = content[0]
+			# assert uuid in self.uuids, \
+			# 	"Could not find UUID \"\" from part annotations in image annotations!".format(uuid)
+			uuid_to_parts[uuid].append([float(c) for c in content[1:]])
+
+		uuid_to_parts = dict(uuid_to_parts)
+		self.part_locs = np.stack([
+			uuid_to_parts[uuid] for uuid in self.uuids]).astype(int)
+
+		if self.has_part_names:
+			self._parse_part_names()
+
+	def _parse_part_names(self) -> None:
+		self.part_names.clear()
+		self.part_name_list.clear()
+
+		for line in self.files.part_names:
+			part_idx, _, name = line.partition(" ")
+			self.part_names[int(part_idx)] = name
+			self.part_name_list.append(name)
+
+	def parts(self, uuid) -> Union[np.ndarray, None]:
+		if self.has_parts:
+			return self.part_locs[self.uuid_to_idx[uuid]].copy()
+
+		return None

+ 3 - 3
cvdatasets/annotations/impl/birdsnap.py → cvdatasets/annotation/old_types/birdsnap.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/cars.py → cvdatasets/annotation/old_types/cars.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/cub.py → cvdatasets/annotation/old_types/cub.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/dogs.py → cvdatasets/annotation/old_types/dogs.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/flowers.py → cvdatasets/annotation/old_types/flowers.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/hed.py → cvdatasets/annotation/old_types/hed.py

@@ -3,9 +3,9 @@ import simplejson as json
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 2 - 2
cvdatasets/annotations/impl/imagenet.py → cvdatasets/annotation/old_types/imagenet.py

@@ -4,8 +4,8 @@ import logging
 
 from pathlib import Path
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 class INET_Annotations(PartsMixin, BaseAnnotations):

+ 3 - 4
cvdatasets/annotations/impl/inat.py → cvdatasets/annotation/old_types/inat.py

@@ -4,12 +4,11 @@ import logging
 import numpy as np
 import simplejson as json
 
-from os.path import isfile
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 3 - 3
cvdatasets/annotations/impl/nab.py → cvdatasets/annotation/old_types/nab.py

@@ -2,9 +2,9 @@ import numpy as np
 
 from os.path import join
 
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
+from cvdatasets.annotation.base import BaseAnnotations
+from cvdatasets.annotation.mixins.bbox_mixin import BBoxMixin
+from cvdatasets.annotation.mixins.parts_mixin import PartsMixin
 from cvdatasets.utils import _MetaInfo
 
 

+ 1 - 1
cvdatasets/annotations/impl/tigers.py → cvdatasets/annotation/old_types/tigers.py

@@ -5,7 +5,7 @@ from os.path import isfile
 from os.path import join
 from sklearn.model_selection import StratifiedShuffleSplit
 
-from cvdatasets.annotations.base import BaseAnnotations
+from cvdatasets.annotation.base import BaseAnnotations
 from cvdatasets.utils import _MetaInfo
 
 class TIGERS_Annotations(BaseAnnotations):

+ 76 - 0
cvdatasets/annotation/types/__init__.py

@@ -0,0 +1,76 @@
+import logging
+
+from cvdatasets.annotation.types.file_list import FileListAnnotations
+from cvdatasets.annotation.types.folder_annotations import FolderAnnotations
+from cvdatasets.annotation.types.json_annotations import JSONAnnotations
+
+from cvargparse.utils import BaseChoiceType
+from cvargparse.utils.enumerations import MetaBaseType
+from cvdatasets.utils import read_info_file
+
+class AnnotationType(BaseChoiceType):
+	FOLDER = FolderAnnotations
+	FILE_LIST = FileListAnnotations
+	JSON = JSONAnnotations
+
+	Default = FILE_LIST
+
+	@classmethod
+	def new_annotation(cls, opts, **kwargs):
+		if opts.dataset in cls:
+			annot = cls[opts.dataset].value
+		else:
+			info_file = read_info_file(opts.data)
+			assert opts.dataset in info_file.DATASETS, \
+				f"No information was found about the dataset \"{args.dataset}\" in the info file \"{args.data}\""
+			ds_info = info_file.DATASETS[opts.dataset]
+			annot = cls[ds_info.annotation_type.lower()].value
+
+		return annot.new(opts, **kwargs)
+
+	@classmethod
+	def as_choices(cls, add_phony=True):
+		choices = super(AnnotationType, cls).as_choices()
+		if not add_phony:
+			return choices
+
+		for key in cls:
+			for phony in cls.phony(key):
+				choices[phony.lower()] = choices[key.name.lower()]
+
+		return choices
+
+	@classmethod
+	def phony(cls, key):
+		""" returns for a key a list of datasets,
+			that use the same annotation class """
+
+		return {
+			cls.FOLDER : [
+				"IMAGENET", "IMAGENET_TOP_INAT20"
+			],
+
+			cls.FILE_LIST : [
+				"CUB200", "CUB200_2FOLD", "CUB200_GOOGLE", "CUB200_GOOGLE_SEM",
+				"NAB", "BIRDSNAP",
+				"CARS", "DOGS", "FLOWERS",
+				"HED", "TIGERS", "TIGERS_TEST",
+
+			],
+
+			cls.JSON : [
+				"INAT18",
+				"INAT19", "INAT19_TEST", "INAT19_MINI",
+				"INAT20", "INAT20_TEST",
+				"INAT20_IN_CLASS",
+				"INAT20_OUT_CLASS",
+				"INAT20_NOISY_IN_CLASS",
+				"INAT20_NOISY_OUT_CLASS",
+				"INAT20_U_IN_CLASS",
+				"INAT20_U_OUT_CLASS",
+			],
+
+		}.get(key, [])
+
+if __name__ == '__main__':
+	print(AnnotationType.as_choices().keys())

+ 50 - 0
cvdatasets/annotation/types/file_list.py

@@ -0,0 +1,50 @@
+import numpy as np
+
+from cvdatasets.annotation.base import Annotations
+from cvdatasets.annotation.files import AnnotationFiles
+
+class FileListAnnotations(Annotations):
+
+	def load_files(self, file_obj) -> AnnotationFiles:
+		file_obj.load_files("images.txt", "labels.txt", "tr_ID.txt")
+		return file_obj
+
+	def _parse_uuids(self) -> None:
+		assert self.files.images is not None, \
+			"Images were not loaded!"
+
+		uuid_fnames = [i.split() for i in self.files.images]
+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
+
+	def _parse_labels(self) -> None:
+		assert self.files.labels is not None, \
+			"Labels were not loaded!"
+
+		labs = list(map(int, self.files.labels))
+		self.labels = np.array(labs, dtype=np.int32)
+
+	def _parse_split(self) -> None:
+		assert self.files.tr_ID is not None, \
+			"Train-test split was not loaded!"
+
+		assert hasattr(self, "uuids"), \
+			"UUIDs were not parsed yet! Please call _parse_uuids before this method!"
+
+		uuid_to_split = {uuid: int(split) for uuid, split in zip(self.uuids, self.files.tr_ID)}
+		self.train_split = np.array([uuid_to_split[uuid] for uuid in self.uuids], dtype=bool)
+		self.test_split = np.logical_not(self.train_split)
+
+if __name__ == '__main__':
+	annot = FileListAnnotations(
+		root_or_infofile="/home/korsch_data/datasets/birds/cub200/ORIGINAL")
+
+	for i, uuid in enumerate(annot.uuids):
+		print(uuid, annot[uuid])
+
+		if i >= 10:
+			break
+
+	train, test = annot.new_train_test_datasets()
+
+	print(len(train), len(test))

+ 69 - 0
cvdatasets/annotation/types/folder_annotations.py

@@ -0,0 +1,69 @@
+import numpy as np
+
+from cvdatasets.annotation.base import Annotations
+from cvdatasets.annotation.files import AnnotationFiles
+
+class FolderAnnotations(Annotations):
+
+	def load_files(self, file_obj) -> AnnotationFiles:
+		file_obj.load_files(
+			train_images="ILSVRC2012_img_train",
+			val_images="ILSVRC2012_img_val",
+			test_images=("ILSVRC2012_img_test", True),
+		)
+		return file_obj
+
+	@property
+	def _has_test_set(self) -> bool:
+		return self.files.test_images is not None
+
+
+	def _parse_uuids(self) -> None:
+		self.images_folder = ""
+
+		train_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+			fpath in self.files.train_images]
+
+		val_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+			fpath in self.files.val_images]
+
+		if self._has_test_set:
+			test_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+				fpath in self.files.test_images]
+
+		uuid_fnames = train_uuid_fnames + val_uuid_fnames
+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
+
+
+	def _parse_labels(self) -> None:
+		train_labs = [fpath.parent.name for fpath in self.files.train_images]
+		val_labs = [fpath.parent.name for fpath in self.files.val_images]
+		labs = train_labs + val_labs
+
+		if self._has_test_set:
+			self.test_labels = [fpath.parent.name for fpath in self.files.test_images]
+
+		self._classes, self.labels = np.unique(labs, return_inverse=True)
+
+
+	def _parse_split(self) -> None:
+		self.train_split = np.ones(len(self.uuids), dtype=bool)
+		self.train_split[len(self.files.train_images):] = False
+
+		self.test_split = np.logical_not(self.train_split)
+
+
+if __name__ == '__main__':
+	annot = FolderAnnotations(
+		root_or_infofile="/home/korsch_data/datasets/ImageNet/TOP_INAT20")
+
+	for i, uuid in enumerate(annot.uuids):
+		print(uuid, annot[uuid])
+
+		if i >= 10:
+			break
+
+	train, test = annot.new_train_test_datasets()
+
+	print(len(train), len(test))

+ 97 - 0
cvdatasets/annotation/types/json_annotations.py

@@ -0,0 +1,97 @@
+import copy
+import hashlib
+import logging
+import numpy as np
+
+from cvdatasets.annotation.base import Annotations
+from cvdatasets.annotation.files import AnnotationFiles
+
+def _uuid_check(uuids):
+	return len(np.unique(uuids)) == len(uuids)
+
+def _uuid_entry(im_info):
+	return hashlib.md5(im_info["file_name"].encode()).hexdigest()
+
+class JSONAnnotations(Annotations):
+
+	def load_files(self, file_obj) -> AnnotationFiles:
+		file_obj.load_files(
+			"trainval.json", "val.json",
+			("unlabeled_train.json", True),
+		)
+		return file_obj
+
+	@property
+	def has_unlabeled_data(self) -> bool:
+		return self.files.unlabeled_train is not None
+
+	def _parse_uuids(self) -> None:
+
+		uuid_fnames = [(str(im["id"]), im["file_name"]) for im in self.files.trainval["images"]]
+		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
+
+		assert _uuid_check(self.uuids) , \
+			"UUIDs are not unique!"
+
+		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
+
+		if self.has_unlabeled_data:
+			logging.info("Loading unlabeled data...")
+			self._parse_unlabeled()
+		else:
+			logging.info("No unlabeled data was provided!")
+
+	def _parse_unlabeled(self) -> None:
+
+		uuid_fnames = [(_uuid_entry(im), im["file_name"]) for im in self.files.unlabeled_train["images"]]
+
+		self.unlabeled = unlabeled = copy.copy(self)
+
+		unlabeled.uuids, unlabeled.image_names = map(np.array, zip(*uuid_fnames))
+		unlabeled.labels = np.full(unlabeled.image_names.shape, -1, dtype=np.int32)
+		unlabeled.train_split = np.full(unlabeled.image_names.shape, 1, dtype=bool)
+		unlabeled.test_split = np.full(unlabeled.image_names.shape, 0, dtype=bool)
+
+		assert len(np.unique(unlabeled.uuids)) == len(unlabeled.uuids), \
+			"Unlabeled UUIDs are not unique!"
+
+		overlap = set(self.uuids) & set(unlabeled.uuids)
+		assert len(overlap) == 0, \
+			f"Unlabeled and labeled UUIDs overlap: {overlap}"
+
+		unlabeled.uuid_to_idx = {uuid: i for i, uuid in enumerate(unlabeled.uuids)}
+
+
+	def _parse_labels(self) -> None:
+		self.labels = np.zeros(len(self.uuids), dtype=np.int32)
+		labs = {str(annot["image_id"]): annot["category_id"]
+			for annot in self.files.trainval["annotations"]}
+
+		for uuid in self.uuids:
+			self.labels[self.uuid_to_idx[uuid]] = labs[uuid]
+
+
+	def _parse_split(self) -> None:
+		self.train_split = np.ones(len(self.uuids), dtype=bool)
+		val_uuids = [str(im["id"]) for im in self.files.val["images"]]
+		for v_uuid in val_uuids:
+			self.train_split[self.uuid_to_idx[v_uuid]] = False
+
+		self.test_split = np.logical_not(self.train_split)
+
+
+
+if __name__ == '__main__':
+	annot = JSONAnnotations(
+		root_or_infofile="/home/korsch_data/datasets/inat/2020/IN_CLASS")
+
+	for i, uuid in enumerate(annot.uuids):
+		print(uuid, annot[uuid])
+
+		if i >= 4:
+			break
+
+	train, test = annot.new_train_test_datasets()
+
+	print(len(train), len(test))
+

+ 0 - 37
cvdatasets/annotations/__init__.py

@@ -1,37 +0,0 @@
-from cvdatasets.annotations.annotation_types import AnnotationType
-from cvdatasets.annotations.impl.birdsnap import BSNAP_Annotations
-from cvdatasets.annotations.impl.cars import CARS_Annotations
-from cvdatasets.annotations.impl.cub import CUB_Annotations
-from cvdatasets.annotations.impl.dogs import DOGS_Annotations
-from cvdatasets.annotations.impl.flowers import FLOWERS_Annotations
-from cvdatasets.annotations.impl.hed import HED_Annotations
-from cvdatasets.annotations.impl.imagenet import INET_Annotations
-from cvdatasets.annotations.impl.inat import INAT18_Annotations
-from cvdatasets.annotations.impl.inat import INAT19_Annotations
-from cvdatasets.annotations.impl.inat import INAT20_Annotations
-from cvdatasets.annotations.impl.nab import NAB_Annotations
-from cvdatasets.annotations.impl.tigers import TIGERS_Annotations
-
-
-from cvdatasets.annotations.base import BaseAnnotations
-from cvdatasets.annotations.base.bbox_mixin import BBoxMixin
-from cvdatasets.annotations.base.parts_mixin import PartsMixin
-
-__all__ = [
-	"AnnotationType",
-	"BaseAnnotations",
-	"BBoxMixin",
-	"BSNAP_Annotations",
-	"CARS_Annotations",
-	"CUB_Annotations",
-	"DOGS_Annotations",
-	"FLOWERS_Annotations",
-	"HED_Annotations",
-	"INAT18_Annotations",
-	"INAT19_Annotations",
-	"INAT20_Annotations",
-	"INET_Annotations",
-	"NAB_Annotations",
-	"PartsMixin",
-	"TIGERS_Annotations",
-]

+ 0 - 68
cvdatasets/annotations/annotation_types.py

@@ -1,68 +0,0 @@
-from cvdatasets.annotations.impl.birdsnap import BSNAP_Annotations
-from cvdatasets.annotations.impl.cars import CARS_Annotations
-from cvdatasets.annotations.impl.cub import CUB_Annotations
-from cvdatasets.annotations.impl.dogs import DOGS_Annotations
-from cvdatasets.annotations.impl.flowers import FLOWERS_Annotations
-from cvdatasets.annotations.impl.hed import HED_Annotations
-from cvdatasets.annotations.impl.imagenet import INET_Annotations
-from cvdatasets.annotations.impl.inat import INAT18_Annotations
-from cvdatasets.annotations.impl.inat import INAT19_Annotations
-from cvdatasets.annotations.impl.inat import INAT20_Annotations
-from cvdatasets.annotations.impl.nab import NAB_Annotations
-from cvdatasets.annotations.impl.tigers import TIGERS_Annotations
-
-from cvargparse.utils import BaseChoiceType
-from functools import partial
-
-class AnnotationType(BaseChoiceType):
-	IMAGENET = INET_Annotations
-
-	CUB200 = CUB_Annotations
-	BIRDSNAP = BSNAP_Annotations
-	NAB = NAB_Annotations
-
-	CARS = CARS_Annotations
-	DOGS = DOGS_Annotations
-
-	FLOWERS = FLOWERS_Annotations
-
-	HED = HED_Annotations
-	TIGERS = TIGERS_Annotations
-
-	INAT18 = INAT18_Annotations
-	INAT19 = INAT19_Annotations
-	INAT20 = INAT20_Annotations
-
-	Default = CUB200
-
-	@classmethod
-	def phony(cls, key):
-		""" returns for a key a list of datasets,
-			that use the same annotation class """
-
-		return {
-			cls.CUB200 : [ "CUB200_2FOLD", "CUB200_GOOGLE", "CUB200_GOOGLE_SEM" ],
-			cls.TIGERS : [ "TIGERS_TEST" ],
-			cls.INAT19 : [ "INAT19_TEST", "INAT19_MINI" ],
-			cls.INAT20 : [ "INAT20_TEST",
-				"INAT20_IN_CLASS",
-				"INAT20_OUT_CLASS",
-				"INAT20_NOISY_IN_CLASS",
-				"INAT20_NOISY_OUT_CLASS",
-				"INAT20_U_IN_CLASS",
-				"INAT20_U_OUT_CLASS",
-			],
-			cls.IMAGENET : [ "IMAGENET_TOP_INAT20" ],
-		}.get(key, [])
-
-	@classmethod
-	def as_choices(cls, add_phony=True):
-		choices = super(AnnotationType, cls).as_choices()
-		if not add_phony:
-			return choices
-
-		for key in cls:
-			for phony in cls.phony(key):
-				choices[phony.lower()] = choices[key.name.lower()]
-
-		return choices

+ 0 - 200
cvdatasets/annotations/base/__init__.py

@@ -1,200 +0,0 @@
-import abc
-import logging
-import numpy as np
-
-from collections import OrderedDict
-from collections import defaultdict
-from os.path import isdir
-from os.path import isfile
-from os.path import join
-
-from cvdatasets.dataset import Dataset
-from cvdatasets.utils import feature_file_name
-from cvdatasets.utils import read_info_file
-from cvdatasets.utils import pretty_print_dict
-from cvdatasets.utils.decorators import only_with_info
-
-class BaseAnnotations(abc.ABC):
-
-	FEATURE_PHONY = dict(train=["train"], test=["test", "val"])
-
-	@classmethod
-	def new(cls, opts, **additional_kwargs):
-		kwargs = dict(
-			root_or_infofile=opts.data,
-			parts=getattr(opts, "parts", None),
-			load_strict=getattr(opts, "load_strict", False),
-			feature_model=getattr(opts, "feature_model", False),
-		)
-
-		kwargs.update(additional_kwargs)
-
-		return cls(**kwargs)
-
-
-	def __init__(self, *, root_or_infofile, feature_model=None, load_strict=True, **kwargs):
-		super(BaseAnnotations, self).__init__(**kwargs)
-		self.feature_model = feature_model
-		self.load_strict = load_strict
-
-		if isdir(root_or_infofile):
-			self.info = None
-			self.root = root_or_infofile
-
-		elif isfile(root_or_infofile):
-			self.root = self.root_from_infofile(root_or_infofile)
-
-		else:
-			raise ValueError("Root folder or info file does not exist: \"{}\"".format(
-				root_or_infofile
-			))
-
-		for struc in self.meta.structure:
-			self.read_content(*struc)
-
-		self.load()
-
-
-	@property
-	@only_with_info
-	def data_root(self):
-		return join(self.info.BASE_DIR, self.info.DATA_DIR)
-
-	@property
-	@only_with_info
-	def dataset_info(self):
-		return self.info.DATASETS[self.__class__.name]
-
-	def root_from_infofile(self, info_file):
-		self.info = read_info_file(info_file)
-
-		dataset_info = self.dataset_info
-		annot_dir = join(self.data_root, dataset_info.folder, dataset_info.annotations)
-
-		assert isdir(annot_dir), "Annotation folder does exist! \"{}\"".format(annot_dir)
-		return annot_dir
-
-	def new_dataset(self, subset=None, dataset_cls=Dataset, **kwargs):
-		if subset is not None:
-			uuids = getattr(self, "{}_uuids".format(subset))
-		else:
-			uuids = self.uuids
-
-		kwargs = self.check_dataset_kwargs(subset, **kwargs)
-		return dataset_cls(uuids=uuids, annotations=self, **kwargs)
-
-	def check_dataset_kwargs(self, subset, **kwargs):
-		dataset_info = self.dataset_info
-		if dataset_info is None:
-			return kwargs
-
-		logging.debug("Dataset info: {}".format(pretty_print_dict(dataset_info)))
-
-		# TODO: pass all scales
-		new_kwargs = {}
-
-		if "scales" in dataset_info:
-			new_kwargs["ratio"] = dataset_info.scales[0]
-
-		if "is_uniform" in dataset_info:
-			new_kwargs["uniform_parts"] = dataset_info.is_uniform
-
-		if None not in [subset, self.feature_model]:
-			tried = []
-			model_info = self.info.MODELS[self.feature_model]
-			for subset_phony in BaseAnnotations.FEATURE_PHONY[subset]:
-				features = feature_file_name(subset_phony, dataset_info, model_info)
-				feature_path = join(self.root, "features", features)
-				if isfile(feature_path): break
-				tried.append(feature_path)
-			else:
-				raise ValueError(
-					"Could not find any features in \"{}\" for {} subset. Tried features: {}".format(
-					join(self.root, "features"), subset, tried))
-
-			logging.info("Using features file from \"{}\"".format(feature_path))
-			new_kwargs["features"] = feature_path
-		new_kwargs.update(kwargs)
-
-		logging.debug("Final kwargs: {}".format(pretty_print_dict(new_kwargs)))
-		return new_kwargs
-
-	@property
-	@abc.abstractmethod
-	def meta(self):
-		pass
-
-	def _path(self, file):
-		return join(self.root, file)
-
-	def _open(self, file):
-		return open(self._path(file))
-
-	def set_content_from_file(self, file, attr, reader, optional=False):
-		content = None
-		fpath = self._path(file)
-
-		if isfile(fpath):
-			with self._open(file) as f:
-				content = reader(f)
-
-		elif not optional:
-			msg = f"File \"{fpath}\" was not found!"
-			if self.load_strict:
-				raise AssertionError(msg)
-			else:
-				logging.warning(msg)
-
-		setattr(self, attr, content)
-
-	def read_content(self, file, attr, optional=False):
-
-		def reader(f):
-			return [line.strip() for line in f if line.strip()]
-
-		self.set_content_from_file(file, attr, reader, optional)
-
-	def load(self):
-		logging.debug("Loading uuids, labels and training-test split")
-		self._load_uuids()
-		self._load_labels()
-		self._load_split()
-
-	def _load_labels(self):
-		self.labels = np.array([int(l) for l in self.labels], dtype=np.int32)
-
-	def _load_uuids(self):
-		assert self._images is not None, "Images were not loaded!"
-		uuid_fnames = [i.split() for i in self._images]
-		self.uuids, self.images = map(np.array, zip(*uuid_fnames))
-		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
-
-	def _load_split(self):
-		assert self._split is not None, "Train-test split was not loaded!"
-		uuid_to_split = {uuid: int(split) for uuid, split in zip(self.uuids, self._split)}
-		self.train_split = np.array([uuid_to_split[uuid] for uuid in self.uuids], dtype=bool)
-		self.test_split = np.logical_not(self.train_split)
-
-	def image_path(self, image):
-		return join(self.root, self.meta.images_folder, image)
-
-	def image(self, uuid):
-		fname = self.images[self.uuid_to_idx[uuid]]
-		return self.image_path(fname)
-
-	def label(self, uuid):
-		return self.labels[self.uuid_to_idx[uuid]].copy()
-
-	def _uuids(self, split):
-		return self.uuids[split]
-
-	@property
-	def train_uuids(self):
-		return self._uuids(self.train_split)
-
-	@property
-	def test_uuids(self):
-		return self._uuids(self.test_split)
-
-from .bbox_mixin import BBoxMixin
-from .parts_mixin import PartsMixin

+ 0 - 34
cvdatasets/annotations/base/bbox_mixin.py

@@ -1,34 +0,0 @@
-import abc
-import logging
-import numpy as np
-
-class BBoxMixin(abc.ABC):
-
-	@property
-	def has_bounding_boxes(self):
-		return hasattr(self, "_bounding_boxes") and self._bounding_boxes is not None
-
-	def load(self):
-		super(BBoxMixin, self).load()
-
-		if self.has_bounding_boxes:
-			self._load_bounding_boxes()
-
-	def _load_bounding_boxes(self):
-		logging.debug("Loading bounding box annotations")
-		assert self._bounding_boxes is not None, "Bouding boxes were not loaded!"
-
-		uuid_to_bbox = {}
-		for content in [i.split() for i in self._bounding_boxes]:
-			uuid, bbox = content[0], content[1:]
-			uuid_to_bbox[uuid] = [float(i) for i in bbox]
-
-		self.bounding_boxes = np.array(
-			[tuple(uuid_to_bbox[uuid]) for uuid in self.uuids],
-			dtype=self.meta.bounding_box_dtype)
-
-	def bounding_box(self, uuid):
-		if self.has_bounding_boxes:
-			return self.bounding_boxes[self.uuid_to_idx[uuid]].copy()
-
-		return np.array((0,0, 1,1), dtype=self.meta.bounding_box_dtype)

+ 0 - 84
cvdatasets/annotations/base/parts_mixin.py

@@ -1,84 +0,0 @@
-import abc
-import logging
-import numpy as np
-
-from collections import OrderedDict
-from collections import defaultdict
-from cvdatasets.utils.decorators import only_with_info
-
-class PartsMixin(abc.ABC):
-
-	def __init__(self, *, parts=None, **kwargs):
-		self.part_type = parts
-		self.part_names = OrderedDict()
-		self.part_name_list = []
-
-		super(PartsMixin, self).__init__(**kwargs)
-
-	@property
-	@only_with_info
-	def dataset_info(self):
-		if self.part_type is not None:
-			return self.info.PARTS[self.part_type]
-		else:
-			return super(PartsMixin, self).dataset_info
-
-	def check_dataset_kwargs(self, subset, **kwargs):
-		if self.dataset_info is None:
-			return kwargs
-
-		new_kwargs = {}
-
-		if self.part_type is not None:
-			new_kwargs["part_rescale_size"] = self.dataset_info.rescale_size
-
-		new_kwargs.update(kwargs)
-
-		return super(PartsMixin, self).check_dataset_kwargs(subset, **new_kwargs)
-
-	@property
-	def has_parts(self):
-		return hasattr(self, "_part_locs") and self._part_locs is not None
-
-	@property
-	def has_part_names(self):
-		return hasattr(self, "_part_names") and self._part_names is not None
-
-	def load(self):
-		super(PartsMixin, self).load()
-
-		if self.has_parts:
-			self._load_parts()
-
-	def _load_parts(self):
-		logging.debug("Loading part annotations")
-		assert self.has_parts, "Part locations were not loaded!"
-		# this part is quite slow... TODO: some runtime improvements?
-		uuid_to_parts = defaultdict(list)
-		for content in [i.split() for i in self._part_locs]:
-			uuid = content[0]
-			# assert uuid in self.uuids, \
-			# 	"Could not find UUID \"\" from part annotations in image annotations!".format(uuid)
-			uuid_to_parts[uuid].append([float(c) for c in content[1:]])
-
-		uuid_to_parts = dict(uuid_to_parts)
-		self.part_locs = np.stack([
-			uuid_to_parts[uuid] for uuid in self.uuids]).astype(int)
-
-		if self.has_part_names:
-			self._load_part_names()
-
-	def _load_part_names(self):
-		self.part_names.clear()
-		self.part_name_list.clear()
-
-		for line in self._part_names:
-			part_idx, _, name = line.partition(" ")
-			self.part_names[int(part_idx)] = name
-			self.part_name_list.append(name)
-
-	def parts(self, uuid):
-		if self.has_parts:
-			return self.part_locs[self.uuid_to_idx[uuid]].copy()
-
-		return None

+ 15 - 4
cvdatasets/dataset/__init__.py

@@ -1,7 +1,18 @@
-from .mixins.reading import AnnotationsReadMixin, ImageListReadingMixin
-from .mixins.parts import PartMixin, RevealedPartMixin, CroppedPartMixin
-from .mixins.features import PreExtractedFeaturesMixin
-from .mixins.chainer_mixins import IteratorMixin
+from cvdatasets.dataset.mixins.chainer_mixins import IteratorMixin
+from cvdatasets.dataset.mixins.features import PreExtractedFeaturesMixin
+from cvdatasets.dataset.mixins.parts import BBCropMixin
+from cvdatasets.dataset.mixins.parts import BBoxMixin
+from cvdatasets.dataset.mixins.parts import CroppedPartMixin
+from cvdatasets.dataset.mixins.parts import MultiBoxMixin
+from cvdatasets.dataset.mixins.parts import PartCropMixin
+from cvdatasets.dataset.mixins.parts import PartMixin
+from cvdatasets.dataset.mixins.parts import PartRevealMixin
+from cvdatasets.dataset.mixins.parts import PartsInBBMixin
+from cvdatasets.dataset.mixins.parts import RandomBlackOutMixin
+from cvdatasets.dataset.mixins.parts import RevealedPartMixin
+from cvdatasets.dataset.mixins.parts import UniformPartMixin
+from cvdatasets.dataset.mixins.reading import AnnotationsReadMixin
+from cvdatasets.dataset.mixins.reading import ImageListReadingMixin
 
 
 class ImageWrapperDataset(PartMixin, PreExtractedFeaturesMixin, AnnotationsReadMixin, IteratorMixin):

+ 35 - 5
cvdatasets/dataset/mixins/parts.py

@@ -1,8 +1,39 @@
 import numpy as np
 
-from . import BaseMixin
+from cvdatasets.dataset.mixins import BaseMixin
 
-class BBCropMixin(BaseMixin):
+class BBoxMixin(BaseMixin):
+
+	def bounding_box(self, i):
+		bbox = self._get("bounding_box", i)
+		return [bbox[attr] for attr in "xywh"]
+
+class MultiBoxMixin(BaseMixin):
+	_all_keys=[
+		"x", "x0", "x1",
+		"y", "y0", "y1",
+		"w", "h",
+	]
+
+	def multi_box(self, i, keys=["x0","x1","y0","y1"]):
+		assert all([key in self._all_keys for key in keys]), \
+			f"unknown keys found: {keys}. Possible are: {self._all_keys}"
+
+		boxes = [
+			dict(
+				x=box["x0"], x0=box["x0"], x1=box["x1"],
+
+				y=box["y0"], y0=box["y0"], y1=box["y1"],
+
+				w=box["x1"] - box["x0"],
+				h=box["y1"] - box["y0"],
+			)
+			for box in self._get("multi_box", i)["objects"]
+		]
+
+		return [[box[key] for key in keys] for box in boxes]
+
+class BBCropMixin(BBoxMixin):
 
 	def __init__(self, *, crop_to_bb=False, crop_uniform=False, **kwargs):
 		super(BBCropMixin, self).__init__(**kwargs)
@@ -10,8 +41,7 @@ class BBCropMixin(BaseMixin):
 		self.crop_uniform = crop_uniform
 
 	def bounding_box(self, i):
-		bbox = self._get("bounding_box", i)
-		x,y,w,h = [bbox[attr] for attr in "xywh"]
+		x,y,w,h = super(BBCropMixin, self).bounding_box(i)
 		if self.crop_uniform:
 			x0 = x + w//2
 			y0 = y + h//2
@@ -29,7 +59,7 @@ class BBCropMixin(BaseMixin):
 			return im_obj.crop(*bb)
 		return im_obj
 
-class PartsInBBMixin(BaseMixin):
+class PartsInBBMixin(BBoxMixin):
 	def __init__(self, parts_in_bb=False, *args, **kwargs):
 		super(PartsInBBMixin, self).__init__(*args, **kwargs)
 		self.parts_in_bb = parts_in_bb

+ 1 - 4
scripts/config.sh

@@ -1,4 +1 @@
-source ${HOME}/.miniconda3/etc/profile.d/conda.sh
-conda activate ${ENV:-chainer6}
-
-PYTHON="python" #-m cProfile -o profile"
+ #-m cProfile -o profile"

+ 4 - 9
scripts/display.py

@@ -9,24 +9,19 @@ import matplotlib.pyplot as plt
 
 from argparse import ArgumentParser
 
-from cvdatasets.annotations import AnnotationType
+from cvdatasets import AnnotationType
 from utils import parser, plot_crops
 
 def main(args):
-	assert args.dataset in AnnotationType, \
-		f"AnnotationType is not known: \"{args.dataset}\""
+	# assert args.dataset in AnnotationType, \
+	# 	f"AnnotationType is not known: \"{args.dataset}\""
 
-	annotation_cls = AnnotationType[args.dataset].value
-
-	logging.info(f"Loading \"{args.dataset}\" annnotations from \"{args.data}\"")
-	annot = annotation_cls(root_or_infofile=args.data, parts=args.parts, load_strict=False)
+	annot = AnnotationType.new_annotation(args)
 
 	kwargs = {}
 	if annot.info is None:
-		# features = args.features[0 if args.subset == "train" else 1]
 		kwargs = dict(
 			part_rescale_size=args.rescale_size,
-			# features=features,
 			uniform_parts=args.uniform_parts,
 			ratio=args.ratio,
 		)

+ 9 - 4
scripts/display.sh

@@ -1,11 +1,14 @@
 #!/usr/bin/env bash
-source config.sh
+source ${HOME}/.miniconda3/etc/profile.d/conda.sh
+conda activate ${ENV:-chainer6}
+
+PYTHON="python"
 
 ############## Possible calls ##############
 
 ##### displays GT parts of CUB200
 # ./display.sh /home/korsch1/korsch/datasets/birds/cub200_11 \
-# 	--dataset cub \
+# 	CUB200 \
 # 	-s600 -n5 \
 # 	--features /home/korsch1/korsch/datasets/birds/features/{train,val}_16parts_gt.npz \
 # 	--ratio 0.31
@@ -13,7 +16,7 @@ source config.sh
 
 ##### displays NAC parts of CUB200
 # ./display.sh /home/korsch1/korsch/datasets/birds/NAC/2017-bilinear/ \
-# 	--dataset cub \
+# 	CUB200 \
 # 	-s600 -n5 \
 # 	--features /home/korsch1/korsch/datasets/birds/features/{train,val}_20parts.npz \
 # 	--ratio 0.31 \
@@ -21,11 +24,13 @@ source config.sh
 
 ##### load from info file, displays Uniform parts of CUB200 that are in the GT bounding box
 # ./display.sh ~/DATA/info.yml \
-# 	-p UNI \
+# 	CUB200
+# 	CUB200_UNI \
 # 	-s600 -n3 \
 # 	--parts_in_bb
 
 ############################################
 
+export DATA=$1
 $PYTHON display.py $@
 

+ 45 - 248
scripts/info_files/info.yml

@@ -53,58 +53,67 @@ DATASETS:
   IMAGENET:         &inet
     folder: ImageNet
     annotations: "BJOERN"
+    annotation_type: FOLDER
     n_classes: 1000
 
+  IMAGENET_TOP_INAT20: &inet_top_inat20
+    <<: *inet
+    annotations: "TOP_INAT20"
+    n_classes: 44
+
   CUB200:         &cub200
     folder: birds/cub200
     annotations: "ORIGINAL"
+    annotation_type: FILE_LIST
     n_classes: 200
 
   CUB200_2FOLD:   &cub200_2fold
+    <<: *cub200
     folder: birds/cub200_2fold
-    annotations: "ORIGINAL"
-    n_classes: 200
 
   CUB200_GOOGLE: &cub200_google
+    <<: *cub200
     folder: birds/cub200_google_images
-    annotations: "ORIGINAL"
-    n_classes: 200
 
   CUB200_GOOGLE_SEM: &cub200_google_sem
+    <<: *cub200
     folder: birds/cub200_google_images
     annotations: "sem_selected"
-    n_classes: 200
-
 
   NAB:         &nabirds
     folder: birds/nabirds
     annotations: "ORIGINAL"
+    annotation_type: FILE_LIST
     n_classes: 555
 
   BIRDSNAP:         &birdsnap
     folder: birds/birdsnap
     annotations: "RESIZED"
+    annotation_type: FILE_LIST
     n_classes: 500
 
-
   CARS:         &cars
     folder: cars
     annotations: "ORIGINAL"
+    annotation_type: FILE_LIST
     n_classes: 196
 
   DOGS:         &dogs
     folder: dogs
     annotations: "ORIGINAL"
+    annotation_type: FILE_LIST
     n_classes: 120
 
   FLOWERS:         &flowers
     folder: flowers
     annotations: "flowers102"
+    annotation_type: FILE_LIST
     n_classes: 102
 
   INAT20:         &inat20
     folder: inat
     annotations: "2020/PLAIN"
+    annotation_type: JSON
     n_classes: 200
 
   INAT20_IN_CLASS:         &inat20_in_class
@@ -134,36 +143,40 @@ DATASETS:
   INAT19:         &inat19
     folder: inat
     annotations: "2019"
+    annotation_type: JSON
     n_classes: 1010
 
-  INAT18:         &inat18
-    folder: inat
-    annotations: "2018"
-    n_classes: 8142
-
   INAT19_MINI:    &inat19_mini
     <<: *inat19
     annotations: "2019_small"
+    feature_suffix: .mini
 
   INAT19_TEST:    &inat19_test
     <<: *inat19
     annotations: "2019_test"
+    feature_suffix: .test
 
+  INAT18:         &inat18
+    folder: inat
+    annotations: "2018"
+    annotation_type: JSON
+    n_classes: 8142
 
   HED:         &hed
     folder: medical/HED
     annotations: "patches224x224"
+    annotation_type: FILE_LIST
     n_classes: 2
 
   TIGERS:         &tigers
     folder: tigers
     annotations: "reid/train"
+    annotation_type: FILE_LIST
     n_classes: 107
 
   TIGERS_TEST:         &tigers_test
-    folder: tigers
+    <<: *tigers
     annotations: "reid/test"
-    n_classes: 107
 
 ############ Existing Part Annotations and Part Features
 ### feature file name composition:
@@ -187,12 +200,15 @@ PART_TYPES:
      - 0.31
 
   GT2:            &parts_gt2
+    # TODO: should be changed, since GT2 parts can also be present for other datasets
+    annotations: cub200_11_regrouped
     feature_suffix: _5parts_gt
     rescale_size: !!int -1
     scales:
       - 0.31
 
   NAC:            &parts_nac
+    annotations: NAC/2017-bilinear
     feature_suffix: _20parts
     rescale_size: !!int 224
     scales:
@@ -200,293 +216,74 @@ PART_TYPES:
       - 0.45
 
   L1_pred:        &parts_l1p
+    annotations: L1_pred
     feature_suffix: _5parts_L1_pred
-    rescale_size: !!int 299
-    scales:
-      - 0.31
+    rescale_size: !!int 427
+    scales: []
 
   L1_full:        &parts_l1f
+    annotations: L1_full
     feature_suffix: _5parts_L1_full
-    rescale_size: !!int 299
-    scales:
-      - 0.31
+    rescale_size: !!int 427
+    scales: []
 
   NTS:        &parts_nts
+    annotations: NTS
     feature_suffix: _7parts_nts
     rescale_size: !!int 448
-    scales:
-      - 0.31
+    scales: []
 
   NTS2:        &parts_nts2
+    annotations: NTS2
     feature_suffix: _5parts_nts
     rescale_size: !!int 448
-    scales:
-      - 0.31
+    scales: []
 
 PARTS:
-  #### No Parts Annotations
-
-  IMAGENET_GLOBAL:
-    <<: *inet
-    <<: *parts_global
-
-  CUB200_2FOLD_GLOBAL:
-    <<: *cub200_2fold
-    <<: *parts_global
-
-  CUB200_GLOBAL:
-    <<: *cub200
-    <<: *parts_global
-
-  CUB200_GOOGLE_GLOBAL:
-    <<: *cub200_google
-    <<: *parts_global
-
-  CUB200_GOOGLE_SEM_GLOBAL:
-    <<: *cub200_google_sem
-    <<: *parts_global
-
-
-  CARS_GLOBAL:
-    <<: *cars
-    <<: *parts_global
-
-  DOGS_GLOBAL:
-    <<: *dogs
-    <<: *parts_global
-
-  NAB_GLOBAL:
-    <<: *nabirds
-    <<: *parts_global
-
-  BIRDSNAP_GLOBAL:
-    <<: *birdsnap
-    <<: *parts_global
-
-  FLOWERS_GLOBAL:
-    <<: *flowers
-    <<: *parts_global
-
-  INAT18_GLOBAL:
-    <<: *inat18
-    <<: *parts_global
-
-  INAT19_GLOBAL:
-    <<: *inat19
-    <<: *parts_global
-
-  INAT19_MINI_GLOBAL:
-    <<: *inat19_mini
-    <<: *parts_global
-    feature_suffix: .mini
-
-  INAT19_TEST_GLOBAL:
-    <<: *inat19_test
-    <<: *parts_global
-    feature_suffix: .test
-
-  INAT20_GLOBAL:
-    <<: *inat20
-    <<: *parts_global
-
-  INAT20_TEST_GLOBAL:
-    <<: *inat20_test
-    <<: *parts_global
-
-  INAT20_IN_CLASS_GLOBAL:
-    <<: *inat20_in_class
-    <<: *parts_global
-
-  INAT20_OUT_CLASS_GLOBAL:
-    <<: *inat20_out_class
-    <<: *parts_global
-
-  INAT20_U_OUT_CLASS_GLOBAL:
-    <<: *inat20_u_out_class
-    <<: *parts_global
-
-  INAT20_NOISY_OUT_CLASS_GLOBAL:
-    <<: *inat20_noisy_out_class
-    <<: *parts_global
-
-  INAT20_NOISY_IN_CLASS_GLOBAL:
-    <<: *inat20_noisy_in_class
-    <<: *parts_global
-
-  HED_GLOBAL:
-    <<: *hed
-    <<: *parts_global
-
-  TIGERS_GLOBAL:
-    <<: *tigers
-    <<: *parts_global
-    rescale_size: !!int -1
-    scales:
-      - .31
-
-  TIGERS_TEST_GLOBAL:
-    <<: *tigers_test
-    <<: *parts_global
-
-  #### With Parts Annotations
-
-
-  ####################################
-  # CUB200-2011 2-Fold training set
-  ####################################
-
-  CUB200_2FOLD_L1_pred:
-    <<: *cub200_2fold
-    <<: *parts_l1p
-    annotations: L1_pred
-    rescale_size: !!int 427
-
-  CUB200_2FOLD_L1_full:
-    <<: *cub200_2fold
-    <<: *parts_l1f
-    annotations: L1_full
-    rescale_size: !!int 427
+  # all <DATASET>_<PART_TYPES> combinations are created implicitely.
+  # if you want some changes in the configs, then crete here an entry
+  # and update the config values
 
   ####################################
   # CUB200-2011
   ####################################
 
-  CUB200_UNI:
-    <<: *cub200
-    <<: *parts_uni
-
-  CUB200_GT:
-    <<: *cub200
-    <<: *parts_gt
-
-  CUB200_GT2:
-    <<: *cub200
-    <<: *parts_gt2
-    annotations: cub200_11_regrouped
-
-  CUB200_NAC:
-    <<: *cub200
-    <<: *parts_nac
-    annotations: NAC/2017-bilinear
-
-  CUB200_NTS:
-    <<: *cub200
-    <<: *parts_nts
-    annotations: NTS
-
-  CUB200_NTS2:
-    <<: *cub200
-    <<: *parts_nts2
-    annotations: NTS2
-
   CUB200_L1_pred:
     <<: *cub200
     <<: *parts_l1p
     annotations: cub200_11_L1_pred_old
-    # rescale_size: !!int 427
+    rescale_size: !!int 299
 
   CUB200_L1_full:
     <<: *cub200
     <<: *parts_l1f
     annotations: cub200_11_L1_full_old
-    # rescale_size: !!int 427
+    rescale_size: !!int 299
 
   CUB200_L1_pred_15:
     <<: *cub200
     <<: *parts_l1p
     annotations: L1_pred_15
-    rescale_size: !!int 427
     feature_suffix: _15parts_L1_pred
 
   CUB200_L1_full_15:
     <<: *cub200
     <<: *parts_l1f
     annotations: L1_full_15
-    rescale_size: !!int 427
     feature_suffix: _15parts_L1_full
 
   CUB200_L1_pred_2:
     <<: *cub200
     <<: *parts_l1p
     annotations: L1_pred_2
-    rescale_size: !!int 427
     feature_suffix: _2parts_L1_pred
 
   CUB200_L1_full_2:
     <<: *cub200
     <<: *parts_l1f
     annotations: L1_full_2
-    rescale_size: !!int 427
     feature_suffix: _2parts_L1_full
 
-
-  ####################################
-  # NA Birds
-  ####################################
-
-  NAB_GT:
-    <<: *nabirds
-    <<: *parts_gt
-
-  NAB_L1_pred:
-    <<: *nabirds
-    <<: *parts_l1p
-    annotations: L1_pred
-    rescale_size: !!int 427
-
-  NAB_L1_full:
-    <<: *nabirds
-    <<: *parts_l1f
-    annotations: L1_full
-    rescale_size: !!int 427
-
-
-  ####################################
-  # BIRDSNAP
-  ####################################
-
-  BIRDSNAP_L1_pred:
-    <<: *birdsnap
-    <<: *parts_l1p
-    annotations: L1_pred
-    rescale_size: !!int 427
-
-  BIRDSNAP_L1_full:
-    <<: *birdsnap
-    <<: *parts_l1f
-    annotations: L1_full
-    rescale_size: !!int 427
-
-  ####################################
-  # Stanford Cars
-  ####################################
-
-  CARS_L1_pred:
-    <<: *cars
-    <<: *parts_l1p
-    annotations: L1_pred
-
-  CARS_L1_full:
-    <<: *cars
-    <<: *parts_l1f
-    annotations: L1_full
-
-
-  ####################################
-  # Flowers 102
-  ####################################
-
-  FLOWERS_L1_pred:
-    <<: *flowers
-    <<: *parts_l1p
-    annotations: L1_pred
-
-  FLOWERS_L1_full:
-    <<: *flowers
-    <<: *parts_l1f
-    annotations: L1_full
-
-
   ####################################
   # iNaturalist 2019
   ####################################

+ 1 - 1
scripts/tests.sh

@@ -4,4 +4,4 @@ source ${_root}/scripts/config.sh
 
 export BASE_DIR="${_root}/tests"
 
-$PYTHON ${BASE_DIR}/main.py
+$PYTHON ${BASE_DIR}/main.py $@

+ 1 - 3
scripts/utils/parser.py

@@ -1,8 +1,6 @@
 import os
 from cvargparse import BaseParser, Arg
 
-from cvdatasets.annotations import AnnotationType
-
 from cvdatasets.utils import read_info_file
 
 DEFAULT_INFO_FILE=os.environ.get("DATA")
@@ -29,7 +27,7 @@ def parse_args():
 				help=data_help),
 			Arg("dataset", choices=info_file.DATASETS.keys(),
 				help=dataset_help),
-			Arg("parts", default="CUB200_GLOBAL", choices=info_file.PARTS.keys(),
+			Arg("parts", choices=info_file.PART_TYPES.keys(),
 				help=parts_help),
 		], group_name="Dataset arguments")
 

+ 3 - 3
tests/test_annotations.py

@@ -9,10 +9,10 @@ from os.path import *
 from abc import ABC, abstractproperty
 
 
-from cvdatasets import BaseAnnotations, _MetaInfo
+from cvdatasets import FileListAnnotations
 from cvdatasets.utils import read_info_file
 
-class MockAnnotation(BaseAnnotations):
+class MockAnnotation(FileListAnnotations):
 	name = "MOCK"
 	index_offset = 0
 
@@ -75,7 +75,7 @@ class BaseAnnotationTest(unittest.TestCase, ABC):
 		if bboxes:
 			with open(fname("bounding_boxes.txt"), "w") as bbox_f:
 				for i in range(index_offset, index_offset + len(images)):
-					print(i, 0, 0, 100, 100, file=bbox_f)
+					print(images[i][0], 0, 0, 100, 100, file=bbox_f)
 
 		if n_parts is not None:
 			parts_dir = join(annot_dir, "parts")