浏览代码

checking for unique uuids in FolderAnnotations. moved uuid check to utils.

Dimitri Korsch 4 年之前
父节点
当前提交
a8d4a3b4da

+ 11 - 3
cvdatasets/annotation/types/folder_annotations.py

@@ -1,5 +1,8 @@
 import numpy as np
 
+from pathlib import Path
+
+from cvdatasets import utils
 from cvdatasets.annotation.base import Annotations
 from cvdatasets.annotation.files import AnnotationFiles
 
@@ -42,22 +45,27 @@ class FolderAnnotations(Annotations):
 	def _has_test_set(self) -> bool:
 		return self.files.test_images is not None
 
+	def _uuid_from_path(self, fpath):
+		return "_".join(Path(fpath).relative_to(self.root).parts)
+		# return "_".join(Path(fpath).parts[-3:])
 
 	def _parse_uuids(self) -> None:
 		self.images_folder = ""
 
-		train_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+		train_uuid_fnames = [(self._uuid_from_path(fpath), str(fpath.relative_to(self.root))) for
 			fpath in self.files.train_images]
 
-		val_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+		val_uuid_fnames = [(self._uuid_from_path(fpath), str(fpath.relative_to(self.root))) for
 			fpath in self.files.val_images]
 
 		if self._has_test_set:
-			test_uuid_fnames = [(fpath.name, str(fpath.relative_to(self.root))) for
+			test_uuid_fnames = [(self._uuid_from_path(fpath), str(fpath.relative_to(self.root))) for
 				fpath in self.files.test_images]
 
 		uuid_fnames = train_uuid_fnames + val_uuid_fnames
 		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
+
+		utils.dataset._uuid_check(self.uuids)
 		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
 
 

+ 2 - 5
cvdatasets/annotation/types/json_annotations.py

@@ -3,12 +3,10 @@ import hashlib
 import logging
 import numpy as np
 
+from cvdatasets import utils
 from cvdatasets.annotation.base import Annotations
 from cvdatasets.annotation.files import AnnotationFiles
 
-def _uuid_check(uuids):
-	return len(np.unique(uuids)) == len(uuids)
-
 def _uuid_entry(im_info):
 	return hashlib.md5(im_info["file_name"].encode()).hexdigest()
 
@@ -30,8 +28,7 @@ class JSONAnnotations(Annotations):
 		uuid_fnames = [(str(im["id"]), im["file_name"]) for im in self.files.trainval["images"]]
 		self.uuids, self.image_names = map(np.array, zip(*uuid_fnames))
 
-		assert _uuid_check(self.uuids) , \
-			"UUIDs are not unique!"
+		utils.dataset._uuid_check(self.uuids)
 
 		self.uuid_to_idx = {uuid: i for i, uuid in enumerate(self.uuids)}
 

+ 3 - 1
cvdatasets/utils/__init__.py

@@ -63,5 +63,7 @@ class _MetaInfo(object):
 		self.structure = []
 
 
-from .image import asarray, dimensions, rescale
 from .dataset import new_iterator
+from .image import asarray
+from .image import dimensions
+from .image import rescale

+ 6 - 0
cvdatasets/utils/dataset.py

@@ -5,6 +5,12 @@ import warnings
 def _format_kwargs(kwargs):
 	return " ".join([f"{key}={value}" for key, value in kwargs.items()])
 
+def _uuid_check(uuids):
+	""" Checks whether the ids are unique """
+
+	assert len(np.unique(uuids)) == len(uuids), \
+		"UUIDs are not unique!"
+
 def new_iterator(data, n_jobs, batch_size, repeat=True, shuffle=True, n_prefetch=2):
 	from chainer.iterators import SerialIterator, MultiprocessIterator