Provider.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import csv
  2. import typing
  3. from os import path
  4. from pycs.interfaces.LabelProvider import LabelProvider
  5. class Provider(LabelProvider):
  6. def __init__(self, root_folder, configuration):
  7. self.csv_path = path.join(root_folder, configuration['filename'])
  8. self.csv_minimum_rarity = configuration['minimumRarity']
  9. def close(self):
  10. pass
  11. def get_labels(self) -> typing.List[dict]:
  12. result = []
  13. with open(self.csv_path, mode='r', newline='', encoding='utf8') as csv_file:
  14. # skip first line which contains column names
  15. csv_file.readline()
  16. # read csv line by line
  17. reader = csv.reader(csv_file, delimiter='\t')
  18. entries = list(map(lambda row: {
  19. 'localOccurence': self.__replace_empty_with_none(row[0]),
  20. 'rarity': self.__replace_empty_with_none(row[1]),
  21. 'superfamily': self.__replace_empty_with_none(row[2]),
  22. 'family': self.__replace_empty_with_none(row[3]),
  23. 'subfamily': self.__replace_empty_with_none(row[4]),
  24. 'tribe': self.__replace_empty_with_none(row[5]),
  25. 'krNumber': self.__replace_empty_with_none(row[9]),
  26. 'genus': self.__replace_empty_with_none(row[10]),
  27. 'species': self.__replace_empty_with_none(row[11])
  28. }, reader))
  29. # filter
  30. if self.csv_minimum_rarity is not None:
  31. def flt_fn(e):
  32. r = e['rarity']
  33. return r is not None and r.isnumeric() and self.csv_minimum_rarity < int(r)
  34. entries = list(filter(flt_fn, entries))
  35. # create result set
  36. for entry in entries:
  37. parent_reference = None
  38. # add hierarchy
  39. for tax in ('superfamily', 'family', 'subfamily', 'tribe'):
  40. if entry[tax] is not None:
  41. reference, name = entry[tax].lower(), entry[tax]
  42. result.append(self.create_label(reference, name, parent_reference))
  43. parent_reference = reference
  44. # add element
  45. if entry['krNumber'].isnumeric():
  46. name = f'{entry["genus"]} {entry["species"]} ({entry["krNumber"]})'
  47. reference = entry['krNumber']
  48. else:
  49. name = f'{entry["genus"]} {entry["species"]}'
  50. reference = name.lower()
  51. result.append(self.create_label(reference, name, parent_reference))
  52. return result
  53. @staticmethod
  54. def __replace_empty_with_none(val: str):
  55. return val if val.strip() else None