#!/usr/bin/env python if __name__ != '__main__': raise Exception("Do not import me!") import csv import json from argparse import ArgumentParser from collections import namedtuple columns = ["KR-Nr.", "Überfamilie", "Familie", "Unterfamilie", "Tribus", "Gattung", "Art", "Seltenheit"] Row = namedtuple("Row", "kr superfamily family subfamily tribus genus species rarity") class ClassName(namedtuple("ClassName", "genus species")): __slots__ = () alt = { "mesapamea_spec": "mesapamea_secalis", "amphipyra_pyramidea_-_berberea": "amphipyra_pyramidea", "aplocera_plagiata_-_efformata": "aplocera_plagiata", "chlroclystis_v-ata": "chloroclystis_v-ata", "epirrita_autumnata_-_dilutata_-_christyi": "epirrita_autumnata", "noctua_janthina_-_janthe": "noctua_janthina", "oligia_latruncula_-_strigilis_-_versicolor": "oligia_latruncula", "thera_variata_-_britannica": "thera_variata" } @classmethod def new(cls, name_string: str): name_string = name_string.strip() name_string = cls.alt.get(name_string, name_string) genus, _, spec = name_string.partition("_") return cls(genus, spec) def main(args): global columns with open(args.class_names) as f: class_names = list(map(ClassName.new, f)) with open(args.csv_file, newline="") as f: reader = csv.DictReader(f, delimiter="\t") rows = [Row(*[row[col] for col in columns]) for row in reader] not_mapped = [] mapping = {} for i, name in enumerate(class_names): mapped = False for row in rows: if name.genus == row.genus.lower() and name.species == row.species.lower(): mapped = True mapping[i] = dict(class_name="_".join(name), **row._asdict()) continue if not mapped: not_mapped.append(name) with open(args.output, "w") as f: json.dump(mapping, f, indent=2) parser = ArgumentParser() parser.add_argument("csv_file") parser.add_argument("class_names") parser.add_argument("--output", default="mapping.json") main(parser.parse_args())