Browse Source

updated classifier code

Dimitri Korsch 3 years ago
parent
commit
5b8c768f5d

+ 5 - 2
models/moth_scanner/configuration.json

@@ -1,7 +1,9 @@
 {
   "name": "Moth detector and classifier",
   "description": "Moth scanner (detection and classification) of moths developed in the context of the AMMOD project.",
-  "supports": [],
+  "supports": [
+    "labeled-bounding-boxes"
+  ],
   "code": {
     "module": "scanner",
     "class": "Scanner"
@@ -22,7 +24,8 @@
   },
   "classifier": {
     "model_type": "cvmodelz.InceptionV3",
+    "input_size": 299,
     "weights": "classifier.npz",
-    "n_classes": 200,
+    "n_classes": 200
   }
 }

+ 8 - 7
models/moth_scanner/scanner/__init__.py

@@ -8,11 +8,13 @@ from pycs.interfaces.MediaStorage import MediaStorage
 from pycs.interfaces.Pipeline import Pipeline as Interface
 
 from .detector import Detector
+from .classifier import Classifier
 
 class Scanner(Interface):
     def __init__(self, root_folder: str, configuration: dict):
         super().__init__(root_folder, configuration)
         self.detector = Detector(configuration["detector"])
+        self.classifier = Classifier(configuration["classifier"], root=root_folder)
 
     def close(self):
         pass
@@ -20,17 +22,16 @@ class Scanner(Interface):
     def execute(self, storage: MediaStorage, file: MediaFile):
 
         im = self.read_image(file.path)
+        bw_im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
 
-        detections = self.detector(im)
+        detections = self.detector(bw_im)
 
         for bbox, info in detections:
             if not info.selected:
                 continue
             x0, y0, x1, y1 = bbox
-            w, h = x1-x0, y1-y0
-            file.add_bounding_box(x0, y0, w, h)
+            label = self.classifier(bbox.crop(im, enlarge=True))
+            file.add_bounding_box(x0, y0, bbox.w, bbox.h, label=label)
 
-    def read_image(self, path: str) -> np.ndarray:
-        im = cv2.imread(path, cv2.IMREAD_COLOR)
-        im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
-        return im
+    def read_image(self, path: str, mode: int = cv2.IMREAD_COLOR) -> np.ndarray:
+        return cv2.imread(path, mode)

+ 26 - 6
models/moth_scanner/scanner/classifier.py

@@ -2,23 +2,23 @@ import numpy as np
 import typing as T
 
 from munch import munchify
+from pathlib import Path
 
 from cvmodelz.models import ModelFactory
-
-
-def
+from chainercv import transforms as tr
 
 class Classifier(object):
 
-    def __init__(self, configuration: T.Dict):
+    def __init__(self, configuration: T.Dict, root: str):
         super().__init__()
 
         config = munchify(configuration)
 
         model_type = config.model_type
         n_classes = config.n_classes
-        weights = config.weights
+        weights = Path(root, config.weights).resolve()
 
+        self.input_size = config.input_size
         self.backbone = ModelFactory.new(model_type)
         self.backbone.load_for_inference(weights,
                                          n_classes=n_classes,
@@ -26,11 +26,31 @@ class Classifier(object):
                                          strict=True,
                                         )
 
-    def __call__(self, im: np.ndarray):
+    def _transform(self, im: np.ndarray):
+        _prepare = self.backbone.meta.prepare_func
+        size = (self.input_size, self.input_size)
+
+        # print(f"{'Orig:': <14s} {im.shape=}")
+        im = _prepare(im, size=size, keep_ratio=True, swap_channels=False)
+        # print(f"{'Prepare:': <14s} {im.shape=}")
+        im = tr.center_crop(im, size)
+        # print(f"{'CenterCrop:': <14s} {im.shape=}")
+        return im
+
+
+    def __call__(self, im: np.ndarray) -> int:
         assert im.ndim in (3, 4), \
             "Classifier accepts only RGB images (3D input) or a batch of images (4D input)!"
 
         if im.ndim == 3:
             # expand first axis
+            # CxHxW -> 1xCxHxW
             im = im[None]
 
+        im = [self._transform(_im) for _im in im]
+        x = self.backbone.xp.array(im)
+        pred = self.backbone(x)
+        pred.to_cpu()
+
+        return int(np.argmax(pred.array, axis=1))
+

+ 47 - 13
models/moth_scanner/scanner/detector.py

@@ -7,11 +7,56 @@ from collections import namedtuple
 from skimage import filters
 
 # the coordinates are relative!
-BBox = namedtuple("BBox", "x0 y0 x1 y1")
 BBoxInfo = namedtuple("BBoxInfo", "area ratio mean std selected", defaults=[-1, -1, -1, -1, False])
 Detection = namedtuple("Detection", "bbox info")
 
+class BBox(namedtuple("BBox", "x0 y0 x1 y1")):
+    __slots__ = ()
 
+    @property
+    def w(self):
+        return abs(self.x1 - self.x0)
+
+    @property
+    def h(self):
+        return abs(self.y1 - self.y0)
+
+
+    @property
+    def area(self):
+        return self.h * self.w
+
+    @property
+    def ratio(self):
+        return min(self.h, self.w) / max(self.h, self.w)
+
+    def crop(self, im: np.ndarray, enlarge: bool = True):
+
+        x0, y0, x1, y1 = self
+        H, W, *_ = im.shape
+
+        # translate from relative coordinates to pixel
+        # coordinates for the given image
+
+        x0, x1 = int(x0 * W), int(x1 * W)
+        y0, y1 = int(y0 * H), int(y1 * H)
+
+        # enlarge to a square extent
+        if enlarge:
+            h, w = int(self.h * H), int(self.h * W)
+            size = max(h, w)
+            dw, dh = (size - w) / 2, (size - h) / 2
+            x0, x1 = max(int(x0 - dw), 0), int(x0 - dw + size)
+            y0, y1 = max(int(y0 - dh), 0), int(y0 - dh + size)
+
+        if im.ndim == 2:
+            return im[y0:y1, x0:x1]
+
+        elif im.ndim == 3:
+            return im[y0:y1, x0:x1, :]
+
+        else:
+            ValueError(f"Unsupported ndims: {im.ndims=}")
 
 
 class Detector(object):
@@ -116,7 +161,7 @@ class Detector(object):
         for i in inds.squeeze():
             bbox, _ = detections[i]
             mean, std, n = _im_mean_std(integral, integral_sq, bbox)
-            area, ratio = _area(bbox), _ratio(bbox)
+            area, ratio = bbox.area, bbox.ratio
             selected = self.is_selected(mean, std, ratio, area)
             info = BBoxInfo(mean, std, area, ratio, selected)
             detections[i] = Detection(bbox, info)
@@ -139,17 +184,6 @@ def _contour2bbox(contour: np.ndarray, shape: T.Tuple[int, int]) -> BBox:
     return BBox(x0/w, y0/h, x1/w, y1/h)
 
 
-def _ratio(bbox: BBox) -> float:
-    x0, y0, x1, y1 = bbox
-    h, w = y1-y0, x1-x0
-    return min(h, w) / max(h, w)
-
-
-def _area(bbox: BBox) -> float:
-    x0, y0, x1, y1 = bbox
-    h, w = y1-y0, x1-x0
-    return h * w
-
 def _im_mean_std(integral: np.ndarray,
                  integral_sq: np.ndarray,
                  bbox: T.Optional[BBox] = None

+ 1 - 0
requirements.txt

@@ -9,4 +9,5 @@ munch
 scikit-image
 
 chainer~=7.8
+chainer-addons~=0.10
 cvmodelz~=0.1