Provider.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import csv
  2. import typing
  3. from os import path
  4. from pycs.interfaces.LabelProvider import LabelProvider
  5. from .RowWrapper import RowWrapper
  6. class Provider(LabelProvider):
  7. def __init__(self, root_folder, configuration):
  8. self.csv_path = path.join(root_folder, configuration['filename'])
  9. self.csv_minimum_rarity = configuration['minimumRarity']
  10. self.csv_all_hierarchy_levels = configuration['includeAllHierarchyLevels']
  11. def close(self):
  12. pass
  13. def get_labels(self) -> typing.List[dict]:
  14. result = []
  15. with open(self.csv_path, mode='r', newline='', encoding='utf8') as csv_file:
  16. # skip first line which contains column names
  17. csv_file.readline()
  18. # read csv line by line
  19. reader = csv.reader(csv_file, delimiter='\t')
  20. entries = list(map(RowWrapper, reader))
  21. # filter
  22. if self.csv_minimum_rarity is not None:
  23. entries = filter(lambda row: row.rarity_is_larger_than(self.csv_minimum_rarity),
  24. entries)
  25. # create result set
  26. if self.csv_all_hierarchy_levels:
  27. hierarchy_levels = (('superfamily', 'Überfamilie'),
  28. ('family', 'Familie'),
  29. ('subfamily', 'Unterfamilie'),
  30. ('tribe', 'Tribus'),
  31. ('genus', 'Gattung'))
  32. else:
  33. hierarchy_levels = (('family', 'Familie'),
  34. ('genus', 'Gattung'))
  35. parents = set()
  36. for entry in entries:
  37. entry = entry.__dict__
  38. parent_reference = None
  39. # add hierarchy
  40. for level, level_name in hierarchy_levels:
  41. if entry[level] is not None:
  42. reference, name = f'{level}_{entry[level].lower()}', entry[level]
  43. # parents should be added once
  44. if reference not in parents:
  45. result.append(self.create_label(reference, name, parent_reference, level_name))
  46. parents.add(reference)
  47. parent_reference = reference
  48. # add element
  49. if entry['kr_number'].isalnum():
  50. name = f'{entry["genus"]} {entry["species"]} ({entry["kr_number"]})'
  51. reference = entry['kr_number']
  52. else:
  53. name = f'{entry["genus"]} {entry["species"]}'
  54. reference = f'_{name.lower()}'
  55. result.append(self.create_label(reference, name, parent_reference))
  56. return result