Browse Source

initial commit

Dimitri Korsch 4 years ago
commit
a4b2e71d63
5 changed files with 421 additions and 0 deletions
  1. 29 0
      .gitignore
  2. 0 0
      INSTALL.md
  3. 0 0
      README.md
  4. 0 0
      data/.gitkeep
  5. 392 0
      moth_classifier.ipynb

+ 29 - 0
.gitignore

@@ -0,0 +1,29 @@
+*.out
+*.pyc
+*.dot
+*.pdf
+*.npz
+
+.results
+
+log
+*.png
+
+*.log
+*.pid
+testing
+*.avi
+*.mp4
+*.zip
+*.osp
+assets
+README.html
+
+.coverage
+htmlcov
+mock_data
+profile
+*.csv
+
+.ipynb_checkpoints/*
+data/*

+ 0 - 0
INSTALL.md


+ 0 - 0
README.md


+ 0 - 0
data/.gitkeep


+ 392 - 0
moth_classifier.ipynb

@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "680ea9da",
+   "metadata": {},
+   "source": [
+    "# Installation\n",
+    "\n",
+    "1. Install miniconda according to the [instructions](https://docs.conda.io/en/latest/miniconda.html) for your OS\n",
+    "\n",
+    "2. Create an environment\n",
+    "```bash\n",
+    "conda create -n myenv python~=3.8.0 matplotlib jupyter opencv\n",
+    "conda activate myenv\n",
+    "```\n",
+    "\n",
+    "3. Install CUDA / cuDNN and required libraries:\n",
+    "```bash\n",
+    "conda install -c conda-forge cudatoolkit~=11.0.0 cudnn~=8.0.0\n",
+    "pip install chainer~=7.0 cupy_cuda110~=7.0 chainercv~=0.13.0\n",
+    "pip install cvmodelz pyaml tabulate tqdm\n",
+    "\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "fb6020a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Chainer info ===\n",
+      "_will_recompute                False\n",
+      "autotune                       False\n",
+      "compute_mode                   None\n",
+      "cudnn_deterministic            False\n",
+      "cudnn_fast_batch_normalization False\n",
+      "cv_read_image_backend          None\n",
+      "cv_resize_backend              cv2\n",
+      "cv_rotate_backend              None\n",
+      "debug                          False\n",
+      "dtype                          float32\n",
+      "enable_backprop                True\n",
+      "in_recomputing                 False\n",
+      "keep_graph_on_report           False\n",
+      "lazy_grad_sum                  False\n",
+      "schedule_func                  None\n",
+      "train                          True\n",
+      "type_check                     True\n",
+      "use_cudnn                      auto\n",
+      "use_cudnn_tensor_core          auto\n",
+      "use_ideep                      never\n",
+      "use_static_graph               True\n",
+      "warn_nondeterministic          False\n",
+      "=== CuPy info ===\n",
+      "CuPy Version          : 7.8.0\n",
+      "CUDA Root             : None\n",
+      "CUDA Build Version    : 11000\n",
+      "CUDA Driver Version   : 11020\n",
+      "CUDA Runtime Version  : 11000\n",
+      "cuBLAS Version        : 11200\n",
+      "cuFFT Version         : 10201\n",
+      "cuRAND Version        : 10201\n",
+      "cuSOLVER Version      : (10, 6, 0)\n",
+      "cuSPARSE Version      : 11101\n",
+      "NVRTC Version         : (11, 0)\n",
+      "cuDNN Build Version   : 8002\n",
+      "cuDNN Version         : 8005\n",
+      "NCCL Build Version    : 2708\n",
+      "NCCL Runtime Version  : 2708\n",
+      "CUB Version           : Enabled\n",
+      "cuTENSOR Version      : None\n"
+     ]
+    }
+   ],
+   "source": [
+    "import chainer\n",
+    "import cupy\n",
+    "import numpy as np\n",
+    "import pyaml\n",
+    "import cv2\n",
+    "cv2.setNumThreads(0)\n",
+    "\n",
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "from typing import Callable\n",
+    "from tqdm.auto import tqdm\n",
+    "from imageio import imread\n",
+    "\n",
+    "from chainer import datasets\n",
+    "from chainer import iterators\n",
+    "from chainer.dataset import concat_examples\n",
+    "from chainercv import transforms as tr\n",
+    "from cvmodelz.models import ModelFactory\n",
+    "from cvmodelz.models import BaseModel\n",
+    "\n",
+    "\n",
+    "chainer.config.cv_resize_backend = \"cv2\"\n",
+    "print(\"=== Chainer info ===\")\n",
+    "chainer.config.show()\n",
+    "print(\"=== CuPy info ===\")\n",
+    "cupy.show_config()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "97b3ca53",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class Arguments(object):\n",
+    "    \n",
+    "    def __str__(self):\n",
+    "        return pyaml.dump(dict(Arguments=self.__dict__), sort_dicts=False)\n",
+    "    \n",
+    "    dataset:      str = \"eu_moths\"\n",
+    "    weights:      str = \"clf_final.npz\"\n",
+    "        \n",
+    "    split_id:     int  = 0\n",
+    "        \n",
+    "    device_id:    int  = 0\n",
+    "    batch_size:   int  = 32\n",
+    "    n_jobs:       int  = 4\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "287b1fa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dataset(chainer.dataset.DatasetMixin):\n",
+    "    \n",
+    "    def __init__(self, root: str, split_id: int, is_train: bool = True):\n",
+    "        super().__init__()\n",
+    "        \n",
+    "        root = Path(root)\n",
+    "        self._root = root\n",
+    "        self.class_names = np.loadtxt(root / \"class_names.txt\", dtype=\"U255\")\n",
+    "        \n",
+    "        _images = np.loadtxt(root / \"images.txt\", dtype=[(\"id\", np.int32), (\"fname\", \"U255\")])\n",
+    "        _labels = np.loadtxt(root / \"labels.txt\", dtype=np.int32)\n",
+    "        _split_ids = np.loadtxt(root / \"tr_ID.txt\", dtype=np.int32)\n",
+    "        \n",
+    "        if is_train:\n",
+    "            # select all other splits\n",
+    "            split_mask = _split_ids != split_id\n",
+    "            \n",
+    "        else:\n",
+    "            # select only images for a given split ID\n",
+    "            split_mask = _split_ids == split_id            \n",
+    "    \n",
+    "        self.images = _images[\"fname\"][split_mask]\n",
+    "        self.labels = _labels[split_mask]\n",
+    "        \n",
+    "        \n",
+    "    def __len__(self):\n",
+    "        return len(self.images)\n",
+    "    \n",
+    "    def get_example(self, i):\n",
+    "        im_path = self._root / \"images\" / self.images[i]\n",
+    "        label = self.labels[i]\n",
+    "        return imread(im_path, pilmode=\"RGB\"), label\n",
+    "\n",
+    "class DataTransformer(object):\n",
+    "    \n",
+    "    def __init__(self, prepare: Callable, size: int):\n",
+    "        super().__init__()\n",
+    "        self.prepare = prepare\n",
+    "        self.size = size\n",
+    "    \n",
+    "    def __call__(self, data):\n",
+    "        image, label = data\n",
+    "        new_image = self.prepare(image, self.size)\n",
+    "        \n",
+    "        new_image = tr.center_crop(new_image, size=(self.size, self.size))\n",
+    "\n",
+    "        # transform the pixel range from 0..1 to -1..1\n",
+    "        new_image = new_image * 2 - 1\n",
+    "        return new_image, label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6f988e87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_model(weights: str):\n",
+    "    \n",
+    "    model = ModelFactory.new(\"cvmodelz.InceptionV3\", n_classes=200)\n",
+    "    model.load(weights, path=\"model/\")\n",
+    "    return model\n",
+    "\n",
+    "def load_datasets(root: Path, model_input_size: int, prepare: Callable, split_id: int):\n",
+    "    \n",
+    "    train_ds = Dataset(root, split_id=split_id, is_train=True)\n",
+    "    val_ds = Dataset(root, split_id=split_id, is_train=False)\n",
+    "    \n",
+    "    transformer = DataTransformer(prepare, model_input_size)\n",
+    "    train_ds = datasets.TransformDataset(train_ds, transformer)\n",
+    "    val_ds = datasets.TransformDataset(val_ds, transformer)\n",
+    "    \n",
+    "    return train_ds, val_ds\n",
+    "\n",
+    "def new_iterator(dataset, n_jobs: int = -1,  **kwargs):\n",
+    "    it_cls = iterators.SerialIterator\n",
+    "    \n",
+    "    if n_jobs >= 1:\n",
+    "        kwargs[\"n_processes\"] = n_jobs\n",
+    "        it_cls = iterators.MultiprocessIterator\n",
+    "    \n",
+    "    return it_cls(dataset, **kwargs)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6cffafe9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(model: chainer.Chain, iterator, device_id: int = -1) -> float:\n",
+    "    if device_id >= 0:\n",
+    "        device = chainer.backends.cuda.get_device_from_id(device_id)\n",
+    "        device.use()\n",
+    "    else:\n",
+    "        # for CPU mode\n",
+    "        device = device_id\n",
+    "    \n",
+    "    model.to_device(device)\n",
+    "    \n",
+    "    n_batches = int(np.ceil(len(iterator.dataset) / iterator.batch_size))\n",
+    "    \n",
+    "    preds = []\n",
+    "    labels = []\n",
+    "    \n",
+    "    iterator.reset()\n",
+    "    for batch in tqdm(iterator, total=n_batches):\n",
+    "        X, y = concat_examples(batch, device=device)\n",
+    "        \n",
+    "        # shape is (batch_size, #classes)\n",
+    "        logits = model(X)\n",
+    "        \n",
+    "        logits.to_cpu()\n",
+    "\n",
+    "        # get the class ID with the highest score\n",
+    "        preds.extend(logits.array.argmax(axis=-1))\n",
+    "        labels.extend(chainer.cuda.to_cpu(y))\n",
+    "    \n",
+    "    return np.mean(np.array(preds) == np.array(labels))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "191b8ce2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def main(args: Arguments):\n",
+    "    model = load_model(args.weights)\n",
+    "\n",
+    "    print(f\"Created {model.meta.name} model with weights from \\\"{args.weights}\\\"\")\n",
+    "\n",
+    "    train_ds, val_ds = load_datasets(args.dataset, model.meta.input_size, model.prepare, split_id=args.split_id)\n",
+    "    print(f\"Found {len(train_ds)} training and {len(val_ds)} validation images\")\n",
+    "\n",
+    "    val_it = new_iterator(val_ds, \n",
+    "                          n_jobs=args.n_jobs, \n",
+    "                          batch_size=args.batch_size,\n",
+    "                          repeat=False,\n",
+    "                          shuffle=False\n",
+    "                         )\n",
+    "\n",
+    "    with chainer.using_config(\"train\", False), chainer.no_backprop_mode():\n",
+    "        accu = evaluate(model, val_it, device_id=args.device_id)\n",
+    "        print(f\"Accuracy: {accu:.2%}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "08b15e42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Arguments:\n",
+      "  dataset: data/datasets/eu_moths\n",
+      "  weights: data/models/clf_final_2021-03-16-15.12.12.019294678.npz\n",
+      "  split_id: 0\n",
+      "  device_id: 0\n",
+      "  batch_size: 32\n",
+      "  n_jobs: 4\n",
+      "\n",
+      "Created InceptionV3 model with weights from \"data/models/clf_final_2021-03-16-15.12.12.019294678.npz\"\n",
+      "Found 1653 training and 552 validation images\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d6db493c995e40679bdc1341c8bb1291",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/18 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/korsch/.miniconda3/envs/ba_king/lib/python3.8/site-packages/chainer/functions/connection/convolution_2d.py:295: PerformanceWarning: The best algo of conv fwd might not be selected due to lack of workspace size (8388608)\n",
+      "  cuda.cudnn.convolution_forward(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 92.21%\n"
+     ]
+    }
+   ],
+   "source": [
+    "args = Arguments(\n",
+    "    dataset=\"data/eu_moths\",\n",
+    "    weights=\"data/clf_final_2021-03-16-15.12.12.019294678.npz\",\n",
+    ")\n",
+    "print(args)\n",
+    "\n",
+    "main(args)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08e8f17b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8bad02a4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}