Project.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. import os
  2. import shutil
  3. import typing as T
  4. from datetime import datetime
  5. from eventlet import tpool
  6. from sqlalchemy.sql import case
  7. from pycs import app
  8. from pycs import db
  9. from pycs.database.base import NamedBaseModel
  10. from pycs.database.Collection import Collection
  11. from pycs.database.File import File
  12. from pycs.database.Label import Label
  13. from pycs.database.util import commit_on_return
  14. from pycs.util.FileOperations import resize_file
  15. class Project(NamedBaseModel):
  16. """ DB Model for projects """
  17. description = db.Column(db.String)
  18. created = db.Column(db.DateTime, default=datetime.utcnow,
  19. index=True, nullable=False)
  20. model_id = db.Column(
  21. db.Integer,
  22. db.ForeignKey("model.id", ondelete="SET NULL"))
  23. label_provider_id = db.Column(
  24. db.Integer,
  25. db.ForeignKey("label_provider.id", ondelete="SET NULL"))
  26. root_folder = db.Column(db.String, nullable=False, unique=True)
  27. external_data = db.Column(db.Boolean, nullable=False)
  28. data_folder = db.Column(db.String, nullable=False)
  29. # contraints
  30. __table_args__ = ()
  31. # relationships to other models
  32. files = db.relationship(
  33. "File",
  34. backref="project",
  35. lazy="dynamic",
  36. passive_deletes=True,
  37. )
  38. labels = db.relationship(
  39. "Label",
  40. backref="project",
  41. lazy="dynamic",
  42. passive_deletes=True,
  43. )
  44. collections = db.relationship(
  45. "Collection",
  46. backref="project",
  47. lazy="dynamic",
  48. passive_deletes=True,
  49. )
  50. serialize_only = NamedBaseModel.serialize_only + (
  51. "created",
  52. "description",
  53. "model_id",
  54. "label_provider_id",
  55. "root_folder",
  56. "external_data",
  57. "data_folder",
  58. )
  59. @commit_on_return
  60. def delete(self) -> T.Tuple[dict, dict]:
  61. # pylint: disable=unexpected-keyword-arg
  62. dump = super().delete(commit=False)
  63. model_dump = {}
  64. if self.model_id is not None:
  65. # pylint: disable=unexpected-keyword-arg
  66. model_dump = self.model.delete(commit=False)
  67. if os.path.exists(self.root_folder):
  68. # remove from file system
  69. shutil.rmtree(self.root_folder)
  70. return dump, model_dump
  71. def label(self, identifier: int) -> T.Optional[Label]:
  72. """
  73. get a label using its unique identifier
  74. :param identifier: unique identifier
  75. :return: label
  76. """
  77. return self.labels.filter(Label.id == identifier).one_or_none()
  78. def label_by_reference(self, reference: str) -> T.Optional[Label]:
  79. """
  80. get a label using its reference string
  81. :param reference: reference string
  82. :return: label
  83. """
  84. return self.labels.filter(Label.reference == reference).one_or_none()
  85. def file(self, identifier: int) -> T.Optional[File]:
  86. """
  87. get a file using its unique identifier
  88. :param identifier: unique identifier
  89. :return: file
  90. """
  91. return self.files.filter(File.id == identifier).one_or_none()
  92. def collection(self, identifier: int) -> T.Optional[Collection]:
  93. """
  94. get a collection using its unique identifier
  95. :param identifier: unique identifier
  96. :return: collection
  97. """
  98. return self.collections.filter(Collection.id == identifier).one_or_none()
  99. def collection_by_reference(self, reference: str) -> T.Optional[Collection]:
  100. """
  101. get a collection using its unique identifier
  102. :param identifier: unique identifier
  103. :return: collection
  104. """
  105. return self.collections.filter(Collection.reference == reference).one_or_none()
  106. @commit_on_return
  107. def create_label(self, name: str,
  108. reference: str = None,
  109. parent: T.Optional[T.Union[int, str, Label]] = None,
  110. hierarchy_level: str = None) -> T.Tuple[T.Optional[Label], bool]:
  111. """
  112. create a label for this project. If there is already a label with the same reference
  113. in the database its name is updated.
  114. :param name: label name
  115. :param reference: label reference
  116. :param parent: parent label. Either a reference string, a Label id or a Label instance
  117. :param hierarchy_level: hierarchy level name
  118. :return: created or edited label, insert
  119. """
  120. label = None
  121. is_new = False
  122. if reference is not None:
  123. label = Label.query.filter_by(project_id=self.id, reference=reference).one_or_none()
  124. if label is None:
  125. label = Label.new(commit=False, project_id=self.id, reference=reference)
  126. is_new = True
  127. label.set_name(name, commit=False)
  128. label.set_parent(parent, commit=False)
  129. label.hierarchy_level = hierarchy_level
  130. return label, is_new
  131. @commit_on_return
  132. def bulk_create_labels(self, labels: T.List[T.Dict]):
  133. """
  134. Inserts a all labels at once.
  135. :raises:
  136. - AssertionError if project_id and reference are not unique
  137. - ValueError if a cycle in the hierarchy is found
  138. """
  139. if len(labels) == 0:
  140. return labels
  141. for label in labels:
  142. label["project_id"] = self.id
  143. self.__check_labels(labels)
  144. # first update existing labels
  145. fields_to_update = (
  146. ("name", Label.name),
  147. ("hierarchy_level", Label.hierarchy_level),
  148. )
  149. updates = {
  150. field: case(
  151. {lab["reference"]: lab[key] for lab in labels},
  152. value=Label.reference)
  153. for key, field in fields_to_update
  154. }
  155. existing_labs = self.labels.filter(
  156. Label.reference.in_([lab["reference"] for lab in labels])
  157. )
  158. app.logger.info(f"Updating {existing_labs.count():,d} labels")
  159. existing_labs.update(updates, synchronize_session=False)
  160. # then add new labels
  161. references = {lab.reference for lab in self.labels.all()}
  162. new_labels = [lab for lab in labels
  163. if lab["reference"] not in references]
  164. if len(new_labels) > 0:
  165. app.logger.info(f"Inserting {len(new_labels):,d} new labels")
  166. db.engine.execute(Label.__table__.insert(), new_labels)
  167. # finally set parents correctly
  168. self.__set_parents(labels)
  169. return labels
  170. def __set_parents(self, labels):
  171. """ after the bul insert, we need to set correct parent_ids """
  172. app.logger.info("Setting parents of the labels")
  173. self.flush()
  174. for label in labels:
  175. if label["parent"] is None:
  176. continue
  177. label_obj = self.label_by_reference(label["reference"])
  178. parent_label_obj = self.label_by_reference(label["parent"])
  179. label_obj.parent_id = parent_label_obj.id
  180. # pylint: disable=no-self-use
  181. def __check_labels(self, labels):
  182. """ check labels for unique keys and cycles """
  183. unique_keys = {}
  184. for label in labels:
  185. key = (label["project_id"], label["reference"])
  186. assert key not in unique_keys, \
  187. f"{key} was not unique: ({label=} vs {unique_keys[key]=})!"
  188. unique_keys[key] = label
  189. # pylint: disable=too-many-arguments
  190. @commit_on_return
  191. def create_collection(self,
  192. reference: str,
  193. name: str,
  194. description: str,
  195. position: int,
  196. autoselect: bool) -> T.Tuple[Collection, bool]:
  197. """
  198. create a new collection associated with this project
  199. :param reference: collection reference string
  200. :param name: collection name
  201. :param description: collection description
  202. :param position: position in menus
  203. :param autoselect: automatically select this collection on session load
  204. :return: collection object, insert
  205. """
  206. collection, is_new = Collection.get_or_create(
  207. project_id=self.id, reference=reference)
  208. collection.name = name
  209. collection.description = description
  210. collection.position = position
  211. collection.autoselect = autoselect
  212. return collection, is_new
  213. # pylint: disable=too-many-arguments
  214. @commit_on_return
  215. def add_file(self,
  216. uuid: str,
  217. file_type: str,
  218. name: str,
  219. extension: str,
  220. size: int,
  221. filename: str,
  222. frames: int = None,
  223. fps: float = None) -> T.Tuple[File, bool]:
  224. """
  225. add a file to this project
  226. :param uuid: unique identifier which is used for temporary files
  227. :param file_type: file type (either image or video)
  228. :param name: file name
  229. :param extension: file extension
  230. :param size: file size
  231. :param filename: actual name in filesystem
  232. :param frames: frame count
  233. :param fps: frames per second
  234. :return: file
  235. """
  236. path = os.path.join(self.data_folder, f"{filename}{extension}")
  237. file, is_new = File.get_or_create(
  238. project_id=self.id, path=path)
  239. file.uuid = uuid
  240. file.type = file_type
  241. file.name = name
  242. file.extension = extension
  243. file.size = size
  244. file.frames = frames
  245. file.fps = fps
  246. # Pre-load common thumbnail sizes if the given file is an image.
  247. # if file.type == 'image' and os.path.isfile(path):
  248. # for max_width, max_height in [(200, 200), (2000, 800)]:
  249. # tpool.execute(resize_file, file, self.root_folder, max_width, max_height)
  250. return file, is_new
  251. def get_files(self, *filters, offset: int = 0, limit: int = -1,
  252. with_annotations: T.Optional[bool] = None) -> T.List[File]:
  253. """
  254. get an iterator of files associated with this project
  255. :param offset: file offset
  256. :param limit: file limit
  257. :return: iterator of files
  258. """
  259. if with_annotations is not None:
  260. annot_query = File.results.any()
  261. if with_annotations is False:
  262. annot_query = ~annot_query
  263. filters = filters + (annot_query,)
  264. return self.files.filter(*filters).order_by(File.path).offset(offset).limit(limit)
  265. def _files_without_results(self):
  266. """
  267. get files without any results
  268. :return: a query object
  269. """
  270. # pylint: disable=no-member
  271. return self.files.filter(~File.results.any())
  272. def count_files_without_results(self) -> int:
  273. """
  274. count files without associated results
  275. :return: count
  276. """
  277. return self._files_without_results().count()
  278. def files_without_results(self) -> T.List[File]:
  279. """
  280. get a list of files without associated results
  281. :return: list of files
  282. """
  283. return self._files_without_results().all()
  284. def _files_without_collection(self, offset: int = 0, limit: int = -1):
  285. """
  286. get files without a collection
  287. :return: a query object
  288. """
  289. # pylint: disable=no-member
  290. return self.get_files(File.collection_id.is_(None), offset=offset, limit=limit)
  291. def files_without_collection(self, offset: int = 0, limit: int = -1) -> T.List[File]:
  292. """
  293. get a list of files without a collection
  294. :return: list of files
  295. """
  296. return self._files_without_collection(offset=offset, limit=limit).all()
  297. def count_files_without_collection(self) -> int:
  298. """
  299. count files associated with this project but without a collection
  300. :return: count
  301. """
  302. return self._files_without_collection().count()