Browse Source

Merge branch '138-create-an-hierarchic-label-provider-from-lepiforum-data' into 'master'

Resolve "create an hierarchic label provider from lepiforum data"

Closes #138

See merge request troebs/pycs!125
Eric Tröbs 3 years ago
parent
commit
ebb062f8d9

File diff suppressed because it is too large
+ 964 - 0
labels/lepiforum_version_7/Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv


+ 71 - 0
labels/lepiforum_version_7/Provider.py

@@ -0,0 +1,71 @@
+import csv
+import typing
+from os import path
+
+from pycs.interfaces.LabelProvider import LabelProvider
+
+
+class Provider(LabelProvider):
+    def __init__(self, root_folder, configuration):
+        self.csv_path = path.join(root_folder, configuration['filename'])
+        self.csv_minimum_rarity = configuration['minimumRarity']
+
+    def close(self):
+        pass
+
+    def get_labels(self) -> typing.List[dict]:
+        result = []
+
+        with open(self.csv_path, mode='r', newline='', encoding='utf8') as csv_file:
+            # skip first line which contains column names
+            csv_file.readline()
+
+            # read csv line by line
+            reader = csv.reader(csv_file, delimiter='\t')
+            entries = list(map(lambda row: {
+                'localOccurence': self.__replace_empty_with_none(row[0]),
+                'rarity': self.__replace_empty_with_none(row[1]),
+                'superfamily': self.__replace_empty_with_none(row[2]),
+                'family': self.__replace_empty_with_none(row[3]),
+                'subfamily': self.__replace_empty_with_none(row[4]),
+                'tribe': self.__replace_empty_with_none(row[5]),
+                'krNumber': self.__replace_empty_with_none(row[9]),
+                'genus': self.__replace_empty_with_none(row[10]),
+                'species': self.__replace_empty_with_none(row[11])
+            }, reader))
+
+        # filter
+        if self.csv_minimum_rarity is not None:
+            def flt_fn(e):
+                r = e['rarity']
+                return r is not None and r.isnumeric() and self.csv_minimum_rarity < int(r)
+
+            entries = list(filter(flt_fn, entries))
+
+        # create result set
+        for entry in entries:
+            parent_reference = None
+
+            # add hierarchy
+            for tax in ('superfamily', 'family', 'subfamily', 'tribe'):
+                if entry[tax] is not None:
+                    reference, name = entry[tax].lower(), entry[tax]
+                    result.append(self.create_label(reference, name, parent_reference))
+
+                    parent_reference = reference
+
+            # add element
+            if entry['krNumber'].isnumeric():
+                name = f'{entry["genus"]} {entry["species"]} ({entry["krNumber"]})'
+                reference = entry['krNumber']
+            else:
+                name = f'{entry["genus"]} {entry["species"]}'
+                reference = name.lower()
+
+            result.append(self.create_label(reference, name, parent_reference))
+
+        return result
+
+    @staticmethod
+    def __replace_empty_with_none(val: str):
+        return val if val.strip() else None

+ 10 - 0
labels/lepiforum_version_7/configuration.json

@@ -0,0 +1,10 @@
+{
+  "name": "Lepiforum Europaliste Schmetterlinge Version 7 Hierarchic",
+  "description": "Stand: 01.01.2020, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+  "filename": "Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv",
+  "minimumRarity": 1
+}

+ 26 - 4
pycs/database/Project.py

@@ -1,7 +1,7 @@
 from contextlib import closing
 from os.path import join
 from time import time
-from typing import List, Optional, Tuple, Iterator
+from typing import List, Optional, Tuple, Iterator, Union
 
 from pycs.database.Collection import Collection
 from pycs.database.File import File
@@ -77,26 +77,48 @@ class Project:
 
             return None
 
+    def label_by_reference(self, reference: str) -> Optional[Label]:
+        """
+        get a label using its reference string
+
+        :param reference: reference string
+        :return: label
+        """
+        with closing(self.database.con.cursor()) as cursor:
+            cursor.execute('SELECT * FROM labels WHERE reference = ? AND project = ?',
+                           (reference, self.identifier))
+            row = cursor.fetchone()
+
+            if row is not None:
+                return Label(self.database, row)
+
+            return None
+
     def create_label(self, name: str, reference: str = None,
-                     parent_id: int = None) -> Tuple[Optional[Label], bool]:
+                     parent: Union[Label, int, str] = None) -> Tuple[Optional[Label], bool]:
         """
         create a label for this project. If there is already a label with the same reference
         in the database its name is updated.
 
         :param name: label name
         :param reference: label reference
-        :param parent_id: parent's identifier
+        :param parent: either parent identifier, parent reference string or `Label` object
         :return: created or edited label, insert
         """
         created = int(time())
 
+        if isinstance(parent, str):
+            parent = self.label_by_reference(parent)
+        if isinstance(parent, Label):
+            parent = parent.identifier
+
         with closing(self.database.con.cursor()) as cursor:
             cursor.execute('''
                 INSERT INTO labels (project, parent, created, reference, name)
                 VALUES (?, ?, ?, ?, ?)
                 ON CONFLICT (project, reference) DO
                 UPDATE SET parent = ?, name = ?
-            ''', (self.identifier, parent_id, created, reference, name, parent_id, name))
+            ''', (self.identifier, parent, created, reference, name, parent, name))
 
             # lastrowid is 0 if on conflict clause applies.
             # If this is the case we do an extra query to receive the row id.

+ 1 - 1
pycs/frontend/endpoints/projects/ExecuteLabelProvider.py

@@ -76,7 +76,7 @@ class ExecuteLabelProvider(View):
         def result(provided_labels):
             with db:
                 for label in provided_labels:
-                    created_label, insert = project.create_label(label['name'], label['id'],
+                    created_label, insert = project.create_label(label['name'], label['reference'],
                                                                  label['parent'])
 
                     if insert:

+ 5 - 5
pycs/interfaces/LabelProvider.py

@@ -32,17 +32,17 @@ class LabelProvider:
         raise NotImplementedError
 
     @staticmethod
-    def create_label(identifier, name, parent_identifier=None):
+    def create_label(reference: str, name: str, parent_reference=None):
         """
         create a label result
 
-        :param identifier: label identifier
+        :param reference: label reference string
         :param name: label name
-        :param parent_identifier: parent's identifier
+        :param parent_reference: parent's reference string
         :return:
         """
         return {
-            'id': identifier,
+            'reference': reference,
             'name': name,
-            'parent': parent_identifier
+            'parent': parent_reference
         }

Some files were not shown because too many files changed in this diff