1
1
فهرست منبع

Merge branch '143-improve-lepiforum-label-provider' into 'master'

Resolve "improve lepiforum label provider"

Closes #143

See merge request troebs/pycs!130
Eric Tröbs 3 سال پیش
والد
کامیت
0c9110c064

+ 15 - 24
labels/lepiforum_version_7/Provider.py

@@ -3,12 +3,14 @@ import typing
 from os import path
 
 from pycs.interfaces.LabelProvider import LabelProvider
+from .RowWrapper import RowWrapper
 
 
 class Provider(LabelProvider):
     def __init__(self, root_folder, configuration):
         self.csv_path = path.join(root_folder, configuration['filename'])
         self.csv_minimum_rarity = configuration['minimumRarity']
+        self.csv_all_hierarchy_levels = configuration['includeAllHierarchyLevels']
 
     def close(self):
         pass
@@ -22,32 +24,25 @@ class Provider(LabelProvider):
 
             # read csv line by line
             reader = csv.reader(csv_file, delimiter='\t')
-            entries = list(map(lambda row: {
-                'localOccurence': self.__replace_empty_with_none(row[0]),
-                'rarity': self.__replace_empty_with_none(row[1]),
-                'superfamily': self.__replace_empty_with_none(row[2]),
-                'family': self.__replace_empty_with_none(row[3]),
-                'subfamily': self.__replace_empty_with_none(row[4]),
-                'tribe': self.__replace_empty_with_none(row[5]),
-                'krNumber': self.__replace_empty_with_none(row[9]),
-                'genus': self.__replace_empty_with_none(row[10]),
-                'species': self.__replace_empty_with_none(row[11])
-            }, reader))
+            entries = list(map(RowWrapper, reader))
 
         # filter
         if self.csv_minimum_rarity is not None:
-            def flt_fn(e):
-                r = e['rarity']
-                return r is not None and r.isnumeric() and self.csv_minimum_rarity < int(r)
-
-            entries = list(filter(flt_fn, entries))
+            entries = filter(lambda row: row.rarity_is_larger_than(self.csv_minimum_rarity),
+                             entries)
 
         # create result set
         for entry in entries:
+            entry = entry.__dict__
             parent_reference = None
 
             # add hierarchy
-            for tax in ('superfamily', 'family', 'subfamily', 'tribe'):
+            if self.csv_all_hierarchy_levels:
+                hierarchy_levels = ('superfamily', 'family', 'subfamily', 'tribe', 'genus')
+            else:
+                hierarchy_levels = ('family', 'genus')
+
+            for tax in hierarchy_levels:
                 if entry[tax] is not None:
                     reference, name = entry[tax].lower(), entry[tax]
                     result.append(self.create_label(reference, name, parent_reference))
@@ -55,9 +50,9 @@ class Provider(LabelProvider):
                     parent_reference = reference
 
             # add element
-            if entry['krNumber'].isnumeric():
-                name = f'{entry["genus"]} {entry["species"]} ({entry["krNumber"]})'
-                reference = entry['krNumber']
+            if entry['kr_number'].isnumeric():
+                name = f'{entry["genus"]} {entry["species"]} ({entry["kr_number"]})'
+                reference = entry['kr_number']
             else:
                 name = f'{entry["genus"]} {entry["species"]}'
                 reference = name.lower()
@@ -65,7 +60,3 @@ class Provider(LabelProvider):
             result.append(self.create_label(reference, name, parent_reference))
 
         return result
-
-    @staticmethod
-    def __replace_empty_with_none(val: str):
-        return val if val.strip() else None

+ 26 - 0
labels/lepiforum_version_7/RowWrapper.py

@@ -0,0 +1,26 @@
+class RowWrapper:
+    def __init__(self, row: list):
+        self.local_occurrence = self.__empty_to_none(row[0])
+        self.rarity = self.__empty_to_none(row[1])
+        self.superfamily = self.__empty_to_none(row[2])
+        self.family = self.__empty_to_default(row[3], self.superfamily)
+        self.subfamily = self.__empty_to_default(row[4], self.family)
+        self.tribe = self.__empty_to_default(row[5], self.subfamily)
+        self.kr_number = self.__empty_to_none(row[9])
+        self.genus = self.__empty_to_default(row[10], self.tribe)
+        self.species = self.__empty_to_none(row[11])
+
+    def rarity_is_larger_than(self, limit: int):
+        return self.rarity is not None and self.rarity.isnumeric() and limit < int(self.rarity)
+
+    @staticmethod
+    def __empty_to_none(val: str):
+        return val if val.strip() else None
+
+    @staticmethod
+    def __empty_to_default(val: str, default: str):
+        val = RowWrapper.__empty_to_none(val)
+        if val is not None:
+            return val
+
+        return default

+ 11 - 0
labels/lepiforum_version_7/configuration1.json

@@ -0,0 +1,11 @@
+{
+  "name": "Lepiforum Version 7 Hierarchic / most common, reduced hierarchy depth",
+  "description": "Stand: 01.01.2020, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+  "filename": "Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv",
+  "minimumRarity": 0,
+  "includeAllHierarchyLevels": false
+}

+ 11 - 0
labels/lepiforum_version_7/configuration2.json

@@ -0,0 +1,11 @@
+{
+  "name": "Lepiforum Version 7 Hierarchic / reduced hierarchy depth",
+  "description": "Stand: 01.01.2020, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+  "filename": "Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv",
+  "minimumRarity": null,
+  "includeAllHierarchyLevels": false
+}

+ 11 - 0
labels/lepiforum_version_7/configuration3.json

@@ -0,0 +1,11 @@
+{
+  "name": "Lepiforum Version 7 Hierarchic / most common",
+  "description": "Stand: 01.01.2020, bearbeitet GBrehm",
+  "code": {
+    "module": "Provider",
+    "class": "Provider"
+  },
+  "filename": "Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv",
+  "minimumRarity": 0,
+  "includeAllHierarchyLevels": true
+}

+ 3 - 2
labels/lepiforum_version_7/configuration.json → labels/lepiforum_version_7/configuration4.json

@@ -1,10 +1,11 @@
 {
-  "name": "Lepiforum Europaliste Schmetterlinge Version 7 Hierarchic",
+  "name": "Lepiforum Version 7 Hierarchic",
   "description": "Stand: 01.01.2020, bearbeitet GBrehm",
   "code": {
     "module": "Provider",
     "class": "Provider"
   },
   "filename": "Lepiforums-Europaliste_Schmetterlinge_Version_7_Stand_2020_01_01_bearbeitet_GBrehm.csv",
-  "minimumRarity": 1
+  "minimumRarity": null,
+  "includeAllHierarchyLevels": true
 }

+ 6 - 4
pycs/database/Database.py

@@ -46,10 +46,12 @@ class Database:
                     ''')
                     cursor.execute('''
                         CREATE TABLE IF NOT EXISTS label_providers (
-                            id          INTEGER PRIMARY KEY,
-                            name        TEXT                NOT NULL,
-                            description TEXT,
-                            root_folder TEXT                NOT NULL UNIQUE
+                            id                 INTEGER PRIMARY KEY,
+                            name               TEXT                NOT NULL,
+                            description        TEXT,
+                            root_folder        TEXT                NOT NULL,
+                            configuration_file TEXT                NOT NULL,
+                            UNIQUE(root_folder, configuration_file)
                         )
                     ''')
 

+ 32 - 0
pycs/database/LabelProvider.py

@@ -1,3 +1,9 @@
+import json
+from os import path
+
+from pycs.interfaces.LabelProvider import LabelProvider as LabelProviderInterface
+
+
 class LabelProvider:
     """
     database class for label providers
@@ -10,3 +16,29 @@ class LabelProvider:
         self.name = row[1]
         self.description = row[2]
         self.root_folder = row[3]
+        self.configuration_file = row[4]
+
+    @property
+    def configuration_path(self):
+        return path.join(self.root_folder, self.configuration_file)
+
+    def load(self) -> LabelProviderInterface:
+        """
+        load configuration.json and create an instance from the included code object
+
+        :return: LabelProvider instance
+        """
+        # load configuration.json
+        with open(self.configuration_path, 'r') as configuration_file:
+            configuration = json.load(configuration_file)
+
+        # load code
+        code_path = path.join(self.root_folder, configuration['code']['module'])
+        module_name = code_path.replace('/', '.').replace('\\', '.')
+        class_name = configuration['code']['class']
+
+        imported_module = __import__(module_name, fromlist=[class_name])
+        class_attr = getattr(imported_module, class_name)
+
+        # return instance
+        return class_attr(self.root_folder, configuration)

+ 27 - 9
pycs/database/discovery/LabelProviderDiscovery.py

@@ -1,7 +1,26 @@
+import re
 from contextlib import closing
 from glob import glob
 from json import load
-from os import path
+from os import path, listdir
+
+
+def __find_files():
+    # list folders in labels/
+    for folder in glob('labels/*'):
+        # list files
+        for filename in listdir(folder):
+            file_path = path.join(folder, filename)
+
+            # filter configuration files
+            if not path.isfile(file_path):
+                continue
+
+            if not re.match(r'^configuration(\d+)?\.json$', filename):
+                continue
+
+            # yield element
+            yield folder, filename, file_path
 
 
 def discover(database):
@@ -12,10 +31,9 @@ def discover(database):
     :return:
     """
     with closing(database.cursor()) as cursor:
-        # list folders in labels/
-        for folder in glob('labels/*'):
-            # load distribution.json
-            with open(path.join(folder, 'configuration.json'), 'r') as file:
+        for folder, configuration_file, configuration_path in __find_files():
+            # load configuration file
+            with open(configuration_path, 'r') as file:
                 label = load(file)
 
             # extract data
@@ -24,8 +42,8 @@ def discover(database):
 
             # save to database
             cursor.execute('''
-                INSERT INTO label_providers (name, description, root_folder)
-                VALUES (?, ?, ?)
-                ON CONFLICT (root_folder)
+                INSERT INTO label_providers (name, description, root_folder, configuration_file)
+                VALUES (?, ?, ?, ?)
+                ON CONFLICT (root_folder, configuration_file)
                 DO UPDATE SET name = ?, description = ?
-            ''', (name, description, folder, name, description))
+            ''', (name, description, folder, configuration_file, name, description))

+ 1 - 2
pycs/frontend/endpoints/projects/ExecuteLabelProvider.py

@@ -9,7 +9,6 @@ from pycs.database.Project import Project
 from pycs.frontend.notifications.NotificationManager import NotificationManager
 from pycs.jobs.JobGroupBusyException import JobGroupBusyException
 from pycs.jobs.JobRunner import JobRunner
-from pycs.util.LabelProviderUtil import load_from_root_folder as load_label_provider
 
 
 class ExecuteLabelProvider(View):
@@ -68,7 +67,7 @@ class ExecuteLabelProvider(View):
         # pylint: disable=invalid-name
         # receive loads and executes the given label provider
         def receive():
-            with closing(load_label_provider(label_provider.root_folder)) as label_provider_impl:
+            with closing(label_provider.load()) as label_provider_impl:
                 provided_labels = label_provider_impl.get_labels()
                 return provided_labels
 

+ 0 - 28
pycs/util/LabelProviderUtil.py

@@ -1,28 +0,0 @@
-from json import load
-from os import path
-
-from pycs.interfaces.LabelProvider import LabelProvider
-
-
-def load_from_root_folder(root_folder: str) -> LabelProvider:
-    """
-    load configuration.json and create an instance from the included code object
-
-    :param root_folder: path to label provider root folder
-    :return: LabelProvider instance
-    """
-    # load configuration.json
-    configuration_path = path.join(root_folder, 'configuration.json')
-    with open(configuration_path, 'r') as configuration_file:
-        configuration = load(configuration_file)
-
-    # load code
-    code_path = path.join(root_folder, configuration['code']['module'])
-    module_name = code_path.replace('/', '.').replace('\\', '.')
-    class_name = configuration['code']['class']
-
-    imported_module = __import__(module_name, fromlist=[class_name])
-    class_attr = getattr(imported_module, class_name)
-
-    # return instance
-    return class_attr(root_folder, configuration)

+ 20 - 4
webui/src/components/projects/project-creation-window.vue

@@ -191,11 +191,9 @@ export default {
       return false;
     },
     availableLabels: function () {
-      let result = [{
-        name: 'None',
-        value: null
-      }];
+      let result = [];
 
+      // add label providers
       for (let label of this.labels) {
         result.push({
           name: label.name,
@@ -203,6 +201,24 @@ export default {
         });
       }
 
+      // sort
+      result.sort((a, b) => {
+        if (a.name.includes(b.name))
+          return +1;
+        if (b.name.includes(a.name))
+          return -1;
+        if (a.name > b.name)
+          return +1;
+        else
+          return -1;
+      });
+
+      // add `None` option
+      result.unshift({
+        name: 'None',
+        value: null
+      });
+
       return result;
     }
   },