6
0
فهرست منبع

added another label_provider implementation using pandas DataFrames. Added config for species from only german-speaking countries

Dimitri Korsch 3 سال پیش
والد
کامیت
1324058814

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 964 - 0
labels/LepiForum_PandasVersion/LepiForum_Species_edited_by_GBrehm.csv


+ 133 - 0
labels/LepiForum_PandasVersion/Provider.py

@@ -0,0 +1,133 @@
+import re
+import numpy as np
+import pandas as pd
+import typing as T
+
+from pathlib import Path
+from munch import munchify
+
+from pycs import app
+from pycs.interfaces.LabelProvider import LabelProvider
+
+class Provider(LabelProvider):
+
+    names = [
+        'is_local',
+        'rarity',
+        'super_family',
+        'family',
+        'sub_family',
+        'tribe',
+        'german',
+        'swiss',
+        'austrian',
+        'kr_nr',
+        'genus',
+        'species',
+        'authors',
+        'comment',
+        'remove_me',
+        'changed',
+        'version1_comment',
+        'misc', # 'D-CH-A / non-KR / Kaukasus',
+        'german_name',
+    ]
+
+    dtype = {
+        'is_local': pd.CategoricalDtype(['nur lokal', 'tagaktiv']),
+        'rarity': np.float32,
+        'super_family': "category",
+        'family': "category",
+        'sub_family': "category",
+        'tribe': "category",
+        'german': pd.CategoricalDtype(['D', 'e', '?']),
+        'swiss': pd.CategoricalDtype(['C', 'e', '?']),
+        'austrian': pd.CategoricalDtype(['A', 'e', '?']),
+        'kr_nr': "object",
+        'genus': "category",
+        'species': "category",
+        'authors': "object",
+        'comment': "object",
+        'remove_me': "category",
+        'changed': "object",
+        'version1_comment': "object",
+        'misc': "object",
+        'german_name': str,
+    }
+
+    KR_REGEX = re.compile(r"^[\d\-a-zA-Z]+")
+
+
+    def __init__(self, root_folder: str, configuration: T.Dict):
+        config = munchify(configuration)
+        self.root = Path(root_folder)
+
+        self.label_file = self.root / config.filename
+        self.min_rarity = config.minimumRarity
+        self.hierarchy_levels = config.hierarchyLevels
+        self.only_german = config.onlyGerman
+
+    def close(self):
+        pass
+
+    def get_labels(self) -> T.List[dict]:
+        result = []
+
+        lepi_list = pd.read_csv(self.label_file,
+                        names=self.names,
+                        dtype=self.dtype,
+                        sep="\t", header=0
+                       )
+        app.logger.info(f"Found {len(lepi_list)} labels in {self.label_file}")
+
+        if self.min_rarity is not None:
+            mask = lepi_list.rarity >= self.min_rarity
+            lepi_list = lepi_list[mask]
+            app.logger.info(f"Labels {len(lepi_list):,d} with {self.min_rarity=}")
+
+        if self.only_german:
+            mask = (
+                lepi_list.german.eq("D") |
+                lepi_list.austrian.eq("A") |
+                lepi_list.swiss.eq("C")
+                ) & \
+                lepi_list["remove_me"].isin([np.nan])
+
+            lepi_list = lepi_list[mask]
+            app.logger.info(f"Labels {len(lepi_list):,d} for german-speaking countries")
+
+
+        parents = set()
+        for i, entry in lepi_list.iterrows():
+            parent_reference = None
+
+            for level, level_name in self.hierarchy_levels:
+                level_entry = entry[level]
+                if level_entry is None:
+                    continue
+
+                reference, name = f'{level}_{level_entry.lower()}', level_entry
+
+                # parents should be added once
+                if reference not in parents:
+                    result.append(self.create_label(reference, name, parent_reference, level_name))
+                    parents.add(reference)
+
+                parent_reference = reference
+
+
+            # add label itself
+            if self.KR_REGEX.match(entry.kr_nr):
+                name = f'{entry.genus} {entry.species} ({entry.kr_nr})'
+                reference = entry.kr_nr
+
+            else:
+                name = f'{entry.genus} {entry.species}'
+                reference = f'_{name.lower()}'
+            result.append(self.create_label(reference, name, parent_reference))
+
+
+        app.logger.info(f"Finally, provided {len(result):,d} labels")
+        return result
+
+

+ 16 - 0
labels/LepiForum_PandasVersion/configuration1.json

@@ -0,0 +1,16 @@
+{
+  "name": "LepiForum (Alle Spezies)",
+  "description": "Stand: 01.12.2021, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+
+  "filename": "LepiForum_Species_edited_by_GBrehm.csv",
+  "minimumRarity": null,
+  "onlyGerman": false,
+  "hierarchyLevels": [
+    ["family", "Familie"],
+    ["genus", "Gattung"]
+  ]
+}

+ 16 - 0
labels/LepiForum_PandasVersion/configuration2.json

@@ -0,0 +1,16 @@
+{
+  "name": "LepiForum (Alle Spezies aus D/A/CH)",
+  "description": "Stand: 01.12.2021, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+
+  "filename": "LepiForum_Species_edited_by_GBrehm.csv",
+  "minimumRarity": null,
+  "onlyGerman": true,
+  "hierarchyLevels": [
+    ["family", "Familie"],
+    ["genus", "Gattung"]
+  ]
+}

+ 16 - 0
labels/LepiForum_PandasVersion/configuration3.json

@@ -0,0 +1,16 @@
+{
+  "name": "LepiForum (Nur häufige Spezies aus D/A/CH)",
+  "description": "Stand: 01.12.2021, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+
+  "filename": "LepiForum_Species_edited_by_GBrehm.csv",
+  "minimumRarity": 0,
+  "onlyGerman": true,
+  "hierarchyLevels": [
+    ["family", "Familie"],
+    ["genus", "Gattung"]
+  ]
+}

+ 14 - 0
labels/LepiForum_PandasVersion/configuration4.json

@@ -0,0 +1,14 @@
+{
+  "name": "LepiForum (Alle Spezies aus D/A/CH, ohne Hierarchie)",
+  "description": "Stand: 01.12.2021, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+
+  "filename": "LepiForum_Species_edited_by_GBrehm.csv",
+  "minimumRarity": null,
+  "onlyGerman": true,
+  "hierarchyLevels": [
+  ]
+}

+ 3 - 0
pycs/database/Project.py

@@ -172,6 +172,9 @@ class Project(NamedBaseModel):
                 - AssertionError if project_id and reference are not unique
                 - AssertionError if project_id and reference are not unique
                 - ValueError if a cycle in the hierarchy is found
                 - ValueError if a cycle in the hierarchy is found
         """
         """
+        if len(labels) == 0:
+            return labels
+
         if clean_old_labels:
         if clean_old_labels:
             self.labels.delete()
             self.labels.delete()
 
 

+ 1 - 0
requirements.txt

@@ -11,6 +11,7 @@ flask-migrate
 python-socketio
 python-socketio
 munch
 munch
 scikit-image
 scikit-image
+pandas
 
 
 chainer~=7.8
 chainer~=7.8
 chainer-addons~=0.10
 chainer-addons~=0.10

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است