4 years ago · a4b2e71d63
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,29 @@
 
				+*.out
			
 
				+*.pyc
			
 
				+*.dot
			
 
				+*.pdf
			
 
				+*.npz
			
 
				+
			
 
				+.results
			
 
				+
			
 
				+log
			
 
				+*.png
			
 
				+
			
 
				+*.log
			
 
				+*.pid
			
 
				+testing
			
 
				+*.avi
			
 
				+*.mp4
			
 
				+*.zip
			
 
				+*.osp
			
 
				+assets
			
 
				+README.html
			
 
				+
			
 
				+.coverage
			
 
				+htmlcov
			
 
				+mock_data
			
 
				+profile
			
 
				+*.csv
			
 
				+
			
 
				+.ipynb_checkpoints/*
			
 
				+data/*
			
--- a/INSTALL.md
+++ b/INSTALL.md
--- a/README.md
+++ b/README.md
--- a/data/.gitkeep
+++ b/data/.gitkeep
--- a/moth_classifier.ipynb
+++ b/moth_classifier.ipynb
@@ -0,0 +1,392 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "680ea9da",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Installation\n",
			
 
				+    "\n",
			
 
				+    "1. Install miniconda according to the [instructions](https://docs.conda.io/en/latest/miniconda.html) for your OS\n",
			
 
				+    "\n",
			
 
				+    "2. Create an environment\n",
			
 
				+    "```bash\n",
			
 
				+    "conda create -n myenv python~=3.8.0 matplotlib jupyter opencv\n",
			
 
				+    "conda activate myenv\n",
			
 
				+    "```\n",
			
 
				+    "\n",
			
 
				+    "3. Install CUDA / cuDNN and required libraries:\n",
			
 
				+    "```bash\n",
			
 
				+    "conda install -c conda-forge cudatoolkit~=11.0.0 cudnn~=8.0.0\n",
			
 
				+    "pip install chainer~=7.0 cupy_cuda110~=7.0 chainercv~=0.13.0\n",
			
 
				+    "pip install cvmodelz pyaml tabulate tqdm\n",
			
 
				+    "\n",
			
 
				+    "```\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "fb6020a1",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "=== Chainer info ===\n",
			
 
				+      "_will_recompute                False\n",
			
 
				+      "autotune                       False\n",
			
 
				+      "compute_mode                   None\n",
			
 
				+      "cudnn_deterministic            False\n",
			
 
				+      "cudnn_fast_batch_normalization False\n",
			
 
				+      "cv_read_image_backend          None\n",
			
 
				+      "cv_resize_backend              cv2\n",
			
 
				+      "cv_rotate_backend              None\n",
			
 
				+      "debug                          False\n",
			
 
				+      "dtype                          float32\n",
			
 
				+      "enable_backprop                True\n",
			
 
				+      "in_recomputing                 False\n",
			
 
				+      "keep_graph_on_report           False\n",
			
 
				+      "lazy_grad_sum                  False\n",
			
 
				+      "schedule_func                  None\n",
			
 
				+      "train                          True\n",
			
 
				+      "type_check                     True\n",
			
 
				+      "use_cudnn                      auto\n",
			
 
				+      "use_cudnn_tensor_core          auto\n",
			
 
				+      "use_ideep                      never\n",
			
 
				+      "use_static_graph               True\n",
			
 
				+      "warn_nondeterministic          False\n",
			
 
				+      "=== CuPy info ===\n",
			
 
				+      "CuPy Version          : 7.8.0\n",
			
 
				+      "CUDA Root             : None\n",
			
 
				+      "CUDA Build Version    : 11000\n",
			
 
				+      "CUDA Driver Version   : 11020\n",
			
 
				+      "CUDA Runtime Version  : 11000\n",
			
 
				+      "cuBLAS Version        : 11200\n",
			
 
				+      "cuFFT Version         : 10201\n",
			
 
				+      "cuRAND Version        : 10201\n",
			
 
				+      "cuSOLVER Version      : (10, 6, 0)\n",
			
 
				+      "cuSPARSE Version      : 11101\n",
			
 
				+      "NVRTC Version         : (11, 0)\n",
			
 
				+      "cuDNN Build Version   : 8002\n",
			
 
				+      "cuDNN Version         : 8005\n",
			
 
				+      "NCCL Build Version    : 2708\n",
			
 
				+      "NCCL Runtime Version  : 2708\n",
			
 
				+      "CUB Version           : Enabled\n",
			
 
				+      "cuTENSOR Version      : None\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import chainer\n",
			
 
				+    "import cupy\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pyaml\n",
			
 
				+    "import cv2\n",
			
 
				+    "cv2.setNumThreads(0)\n",
			
 
				+    "\n",
			
 
				+    "from dataclasses import dataclass\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "from typing import Callable\n",
			
 
				+    "from tqdm.auto import tqdm\n",
			
 
				+    "from imageio import imread\n",
			
 
				+    "\n",
			
 
				+    "from chainer import datasets\n",
			
 
				+    "from chainer import iterators\n",
			
 
				+    "from chainer.dataset import concat_examples\n",
			
 
				+    "from chainercv import transforms as tr\n",
			
 
				+    "from cvmodelz.models import ModelFactory\n",
			
 
				+    "from cvmodelz.models import BaseModel\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "chainer.config.cv_resize_backend = \"cv2\"\n",
			
 
				+    "print(\"=== Chainer info ===\")\n",
			
 
				+    "chainer.config.show()\n",
			
 
				+    "print(\"=== CuPy info ===\")\n",
			
 
				+    "cupy.show_config()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "97b3ca53",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "@dataclass\n",
			
 
				+    "class Arguments(object):\n",
			
 
				+    "    \n",
			
 
				+    "    def __str__(self):\n",
			
 
				+    "        return pyaml.dump(dict(Arguments=self.__dict__), sort_dicts=False)\n",
			
 
				+    "    \n",
			
 
				+    "    dataset:      str = \"eu_moths\"\n",
			
 
				+    "    weights:      str = \"clf_final.npz\"\n",
			
 
				+    "        \n",
			
 
				+    "    split_id:     int  = 0\n",
			
 
				+    "        \n",
			
 
				+    "    device_id:    int  = 0\n",
			
 
				+    "    batch_size:   int  = 32\n",
			
 
				+    "    n_jobs:       int  = 4\n",
			
 
				+    "        "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "287b1fa1",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "class Dataset(chainer.dataset.DatasetMixin):\n",
			
 
				+    "    \n",
			
 
				+    "    def __init__(self, root: str, split_id: int, is_train: bool = True):\n",
			
 
				+    "        super().__init__()\n",
			
 
				+    "        \n",
			
 
				+    "        root = Path(root)\n",
			
 
				+    "        self._root = root\n",
			
 
				+    "        self.class_names = np.loadtxt(root / \"class_names.txt\", dtype=\"U255\")\n",
			
 
				+    "        \n",
			
 
				+    "        _images = np.loadtxt(root / \"images.txt\", dtype=[(\"id\", np.int32), (\"fname\", \"U255\")])\n",
			
 
				+    "        _labels = np.loadtxt(root / \"labels.txt\", dtype=np.int32)\n",
			
 
				+    "        _split_ids = np.loadtxt(root / \"tr_ID.txt\", dtype=np.int32)\n",
			
 
				+    "        \n",
			
 
				+    "        if is_train:\n",
			
 
				+    "            # select all other splits\n",
			
 
				+    "            split_mask = _split_ids != split_id\n",
			
 
				+    "            \n",
			
 
				+    "        else:\n",
			
 
				+    "            # select only images for a given split ID\n",
			
 
				+    "            split_mask = _split_ids == split_id            \n",
			
 
				+    "    \n",
			
 
				+    "        self.images = _images[\"fname\"][split_mask]\n",
			
 
				+    "        self.labels = _labels[split_mask]\n",
			
 
				+    "        \n",
			
 
				+    "        \n",
			
 
				+    "    def __len__(self):\n",
			
 
				+    "        return len(self.images)\n",
			
 
				+    "    \n",
			
 
				+    "    def get_example(self, i):\n",
			
 
				+    "        im_path = self._root / \"images\" / self.images[i]\n",
			
 
				+    "        label = self.labels[i]\n",
			
 
				+    "        return imread(im_path, pilmode=\"RGB\"), label\n",
			
 
				+    "\n",
			
 
				+    "class DataTransformer(object):\n",
			
 
				+    "    \n",
			
 
				+    "    def __init__(self, prepare: Callable, size: int):\n",
			
 
				+    "        super().__init__()\n",
			
 
				+    "        self.prepare = prepare\n",
			
 
				+    "        self.size = size\n",
			
 
				+    "    \n",
			
 
				+    "    def __call__(self, data):\n",
			
 
				+    "        image, label = data\n",
			
 
				+    "        new_image = self.prepare(image, self.size)\n",
			
 
				+    "        \n",
			
 
				+    "        new_image = tr.center_crop(new_image, size=(self.size, self.size))\n",
			
 
				+    "\n",
			
 
				+    "        # transform the pixel range from 0..1 to -1..1\n",
			
 
				+    "        new_image = new_image * 2 - 1\n",
			
 
				+    "        return new_image, label"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "6f988e87",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def load_model(weights: str):\n",
			
 
				+    "    \n",
			
 
				+    "    model = ModelFactory.new(\"cvmodelz.InceptionV3\", n_classes=200)\n",
			
 
				+    "    model.load(weights, path=\"model/\")\n",
			
 
				+    "    return model\n",
			
 
				+    "\n",
			
 
				+    "def load_datasets(root: Path, model_input_size: int, prepare: Callable, split_id: int):\n",
			
 
				+    "    \n",
			
 
				+    "    train_ds = Dataset(root, split_id=split_id, is_train=True)\n",
			
 
				+    "    val_ds = Dataset(root, split_id=split_id, is_train=False)\n",
			
 
				+    "    \n",
			
 
				+    "    transformer = DataTransformer(prepare, model_input_size)\n",
			
 
				+    "    train_ds = datasets.TransformDataset(train_ds, transformer)\n",
			
 
				+    "    val_ds = datasets.TransformDataset(val_ds, transformer)\n",
			
 
				+    "    \n",
			
 
				+    "    return train_ds, val_ds\n",
			
 
				+    "\n",
			
 
				+    "def new_iterator(dataset, n_jobs: int = -1,  **kwargs):\n",
			
 
				+    "    it_cls = iterators.SerialIterator\n",
			
 
				+    "    \n",
			
 
				+    "    if n_jobs >= 1:\n",
			
 
				+    "        kwargs[\"n_processes\"] = n_jobs\n",
			
 
				+    "        it_cls = iterators.MultiprocessIterator\n",
			
 
				+    "    \n",
			
 
				+    "    return it_cls(dataset, **kwargs)\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "6cffafe9",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def evaluate(model: chainer.Chain, iterator, device_id: int = -1) -> float:\n",
			
 
				+    "    if device_id >= 0:\n",
			
 
				+    "        device = chainer.backends.cuda.get_device_from_id(device_id)\n",
			
 
				+    "        device.use()\n",
			
 
				+    "    else:\n",
			
 
				+    "        # for CPU mode\n",
			
 
				+    "        device = device_id\n",
			
 
				+    "    \n",
			
 
				+    "    model.to_device(device)\n",
			
 
				+    "    \n",
			
 
				+    "    n_batches = int(np.ceil(len(iterator.dataset) / iterator.batch_size))\n",
			
 
				+    "    \n",
			
 
				+    "    preds = []\n",
			
 
				+    "    labels = []\n",
			
 
				+    "    \n",
			
 
				+    "    iterator.reset()\n",
			
 
				+    "    for batch in tqdm(iterator, total=n_batches):\n",
			
 
				+    "        X, y = concat_examples(batch, device=device)\n",
			
 
				+    "        \n",
			
 
				+    "        # shape is (batch_size, #classes)\n",
			
 
				+    "        logits = model(X)\n",
			
 
				+    "        \n",
			
 
				+    "        logits.to_cpu()\n",
			
 
				+    "\n",
			
 
				+    "        # get the class ID with the highest score\n",
			
 
				+    "        preds.extend(logits.array.argmax(axis=-1))\n",
			
 
				+    "        labels.extend(chainer.cuda.to_cpu(y))\n",
			
 
				+    "    \n",
			
 
				+    "    return np.mean(np.array(preds) == np.array(labels))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "191b8ce2",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def main(args: Arguments):\n",
			
 
				+    "    model = load_model(args.weights)\n",
			
 
				+    "\n",
			
 
				+    "    print(f\"Created {model.meta.name} model with weights from \\\"{args.weights}\\\"\")\n",
			
 
				+    "\n",
			
 
				+    "    train_ds, val_ds = load_datasets(args.dataset, model.meta.input_size, model.prepare, split_id=args.split_id)\n",
			
 
				+    "    print(f\"Found {len(train_ds)} training and {len(val_ds)} validation images\")\n",
			
 
				+    "\n",
			
 
				+    "    val_it = new_iterator(val_ds, \n",
			
 
				+    "                          n_jobs=args.n_jobs, \n",
			
 
				+    "                          batch_size=args.batch_size,\n",
			
 
				+    "                          repeat=False,\n",
			
 
				+    "                          shuffle=False\n",
			
 
				+    "                         )\n",
			
 
				+    "\n",
			
 
				+    "    with chainer.using_config(\"train\", False), chainer.no_backprop_mode():\n",
			
 
				+    "        accu = evaluate(model, val_it, device_id=args.device_id)\n",
			
 
				+    "        print(f\"Accuracy: {accu:.2%}\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "08b15e42",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Arguments:\n",
			
 
				+      "  dataset: data/datasets/eu_moths\n",
			
 
				+      "  weights: data/models/clf_final_2021-03-16-15.12.12.019294678.npz\n",
			
 
				+      "  split_id: 0\n",
			
 
				+      "  device_id: 0\n",
			
 
				+      "  batch_size: 32\n",
			
 
				+      "  n_jobs: 4\n",
			
 
				+      "\n",
			
 
				+      "Created InceptionV3 model with weights from \"data/models/clf_final_2021-03-16-15.12.12.019294678.npz\"\n",
			
 
				+      "Found 1653 training and 552 validation images\n"
			
 
				+     ]
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "application/vnd.jupyter.widget-view+json": {
			
 
				+       "model_id": "d6db493c995e40679bdc1341c8bb1291",
			
 
				+       "version_major": 2,
			
 
				+       "version_minor": 0
			
 
				+      },
			
 
				+      "text/plain": [
			
 
				+       "  0%|          | 0/18 [00:00<?, ?it/s]"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "name": "stderr",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "/home/korsch/.miniconda3/envs/ba_king/lib/python3.8/site-packages/chainer/functions/connection/convolution_2d.py:295: PerformanceWarning: The best algo of conv fwd might not be selected due to lack of workspace size (8388608)\n",
			
 
				+      "  cuda.cudnn.convolution_forward(\n"
			
 
				+     ]
			
 
				+    },
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Accuracy: 92.21%\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "args = Arguments(\n",
			
 
				+    "    dataset=\"data/eu_moths\",\n",
			
 
				+    "    weights=\"data/clf_final_2021-03-16-15.12.12.019294678.npz\",\n",
			
 
				+    ")\n",
			
 
				+    "print(args)\n",
			
 
				+    "\n",
			
 
				+    "main(args)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "08e8f17b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "8bad02a4",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.10"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}