123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- #!/usr/bin/env python
- if __name__ != '__main__': raise Exception("Do not import me!")
- import csv
- import json
- from argparse import ArgumentParser
- from collections import namedtuple
- columns = ["KR-Nr.", "Überfamilie", "Familie", "Unterfamilie", "Tribus", "Gattung", "Art", "Seltenheit"]
- Row = namedtuple("Row", "kr superfamily family subfamily tribus genus species rarity")
- class ClassName(namedtuple("ClassName", "genus species")):
- __slots__ = ()
- alt = {
- "mesapamea_spec": "mesapamea_secalis",
- "amphipyra_pyramidea_-_berberea": "amphipyra_pyramidea",
- "aplocera_plagiata_-_efformata": "aplocera_plagiata",
- "chlroclystis_v-ata": "chloroclystis_v-ata",
- "epirrita_autumnata_-_dilutata_-_christyi": "epirrita_autumnata",
- "noctua_janthina_-_janthe": "noctua_janthina",
- "oligia_latruncula_-_strigilis_-_versicolor": "oligia_latruncula",
- "thera_variata_-_britannica": "thera_variata"
- }
- @classmethod
- def new(cls, name_string: str):
- name_string = name_string.strip()
- name_string = cls.alt.get(name_string, name_string)
- genus, _, spec = name_string.partition("_")
- return cls(genus, spec)
- def main(args):
- global columns
- with open(args.class_names) as f:
- class_names = list(map(ClassName.new, f))
- with open(args.csv_file, newline="") as f:
- reader = csv.DictReader(f, delimiter="\t")
- rows = [Row(*[row[col] for col in columns]) for row in reader]
- not_mapped = []
- mapping = {}
- for i, name in enumerate(class_names):
- mapped = False
- for row in rows:
- if name.genus == row.genus.lower() and name.species == row.species.lower():
- mapped = True
- mapping[i] = dict(class_name="_".join(name), **row._asdict())
- continue
- if not mapped:
- not_mapped.append(name)
- with open(args.output, "w") as f:
- json.dump(mapping, f, indent=2)
- parser = ArgumentParser()
- parser.add_argument("csv_file")
- parser.add_argument("class_names")
- parser.add_argument("--output", default="mapping.json")
- main(parser.parse_args())
|