{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Installation\n",
    "\n",
    "1. Install miniconda according to the [instructions](https://docs.conda.io/en/latest/miniconda.html) for your OS\n",
    "\n",
    "2. Create an environment\n",
    "```bash\n",
    "conda create -n myenv python~=3.8.0 matplotlib jupyter opencv\n",
    "conda activate myenv\n",
    "```\n",
    "\n",
    "3. Install CUDA / cuDNN and required libraries:\n",
    "```bash\n",
    "conda install -c conda-forge cudatoolkit~=11.0.0 cudnn~=8.0.0\n",
    "pip install chainer~=7.0 cupy_cuda110~=7.0 chainercv~=0.13.0\n",
    "pip install cvmodelz pyaml tabulate tqdm\n",
    "```\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== Chainer info ===\n",
      "_will_recompute                False\n",
      "autotune                       False\n",
      "compute_mode                   None\n",
      "cudnn_deterministic            False\n",
      "cudnn_fast_batch_normalization False\n",
      "cv_read_image_backend          None\n",
      "cv_resize_backend              cv2\n",
      "cv_rotate_backend              None\n",
      "debug                          False\n",
      "dtype                          float32\n",
      "enable_backprop                True\n",
      "in_recomputing                 False\n",
      "keep_graph_on_report           False\n",
      "lazy_grad_sum                  False\n",
      "schedule_func                  None\n",
      "train                          True\n",
      "type_check                     True\n",
      "use_cudnn                      auto\n",
      "use_cudnn_tensor_core          auto\n",
      "use_ideep                      never\n",
      "use_static_graph               True\n",
      "warn_nondeterministic          False\n",
      "=== CuPy info ===\n",
      "CuPy Version          : 7.8.0\n",
      "CUDA Root             : None\n",
      "CUDA Build Version    : 11000\n",
      "CUDA Driver Version   : 11020\n",
      "CUDA Runtime Version  : 11000\n",
      "cuBLAS Version        : 11200\n",
      "cuFFT Version         : 10201\n",
      "cuRAND Version        : 10201\n",
      "cuSOLVER Version      : (10, 6, 0)\n",
      "cuSPARSE Version      : 11101\n",
      "NVRTC Version         : (11, 0)\n",
      "cuDNN Build Version   : 8002\n",
      "cuDNN Version         : 8005\n",
      "NCCL Build Version    : 2708\n",
      "NCCL Runtime Version  : 2708\n",
      "CUB Version           : Enabled\n",
      "cuTENSOR Version      : None\n"
     ]
    }
   ],
   "source": [
    "import chainer\n",
    "import cupy\n",
    "import numpy as np\n",
    "import pyaml\n",
    "import cv2\n",
    "cv2.setNumThreads(0)\n",
    "\n",
    "from dataclasses import dataclass\n",
    "from pathlib import Path\n",
    "from typing import Callable\n",
    "from tqdm.auto import tqdm\n",
    "from imageio import imread\n",
    "\n",
    "from chainer import datasets\n",
    "from chainer import iterators\n",
    "from chainer.dataset import concat_examples\n",
    "from chainercv import transforms as tr\n",
    "from cvmodelz.models import ModelFactory\n",
    "from cvmodelz.models import BaseModel\n",
    "\n",
    "\n",
    "chainer.config.cv_resize_backend = \"cv2\"\n",
    "print(\"=== Chainer info ===\")\n",
    "chainer.config.show()\n",
    "print(\"=== CuPy info ===\")\n",
    "cupy.show_config()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass\n",
    "class Arguments(object):\n",
    "    \n",
    "    def __str__(self):\n",
    "        return pyaml.dump(dict(Arguments=self.__dict__), sort_dicts=False)\n",
    "    \n",
    "    dataset:      str = \"eu_moths\"\n",
    "    weights:      str = \"clf_final.npz\"\n",
    "        \n",
    "    split_id:     int  = 0\n",
    "        \n",
    "    device_id:    int  = 0\n",
    "    batch_size:   int  = 32\n",
    "    n_jobs:       int  = 4\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Dataset(chainer.dataset.DatasetMixin):\n",
    "    \n",
    "    def __init__(self, root: str, split_id: int, is_train: bool = True):\n",
    "        super().__init__()\n",
    "        \n",
    "        root = Path(root)\n",
    "        self._root = root\n",
    "        self.class_names = np.loadtxt(root / \"class_names.txt\", dtype=\"U255\")\n",
    "        \n",
    "        _images = np.loadtxt(root / \"images.txt\", dtype=[(\"id\", np.int32), (\"fname\", \"U255\")])\n",
    "        _labels = np.loadtxt(root / \"labels.txt\", dtype=np.int32)\n",
    "        _split_ids = np.loadtxt(root / \"tr_ID.txt\", dtype=np.int32)\n",
    "        \n",
    "        if is_train:\n",
    "            # select all other splits\n",
    "            split_mask = _split_ids != split_id\n",
    "            \n",
    "        else:\n",
    "            # select only images for a given split ID\n",
    "            split_mask = _split_ids == split_id            \n",
    "    \n",
    "        self.images = _images[\"fname\"][split_mask]\n",
    "        self.labels = _labels[split_mask]\n",
    "        \n",
    "        \n",
    "    def __len__(self):\n",
    "        return len(self.images)\n",
    "    \n",
    "    def get_example(self, i):\n",
    "        im_path = self._root / \"images\" / self.images[i]\n",
    "        label = self.labels[i]\n",
    "        return imread(im_path, pilmode=\"RGB\"), label\n",
    "\n",
    "class DataTransformer(object):\n",
    "    \n",
    "    def __init__(self, prepare: Callable, size: int):\n",
    "        super().__init__()\n",
    "        self.prepare = prepare\n",
    "        self.size = size\n",
    "    \n",
    "    def __call__(self, data):\n",
    "        image, label = data\n",
    "        new_image = self.prepare(image, self.size)\n",
    "        \n",
    "        new_image = tr.center_crop(new_image, size=(self.size, self.size))\n",
    "\n",
    "        # transform the pixel range from 0..1 to -1..1\n",
    "        new_image = new_image * 2 - 1\n",
    "        return new_image, label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_model(weights: str):\n",
    "    \n",
    "    model = ModelFactory.new(\"cvmodelz.InceptionV3\", n_classes=200)\n",
    "    model.load(weights, path=\"model/\")\n",
    "    return model\n",
    "\n",
    "def load_datasets(root: Path, model_input_size: int, prepare: Callable, split_id: int):\n",
    "    \n",
    "    train_ds = Dataset(root, split_id=split_id, is_train=True)\n",
    "    val_ds = Dataset(root, split_id=split_id, is_train=False)\n",
    "    \n",
    "    transformer = DataTransformer(prepare, model_input_size)\n",
    "    train_ds = datasets.TransformDataset(train_ds, transformer)\n",
    "    val_ds = datasets.TransformDataset(val_ds, transformer)\n",
    "    \n",
    "    return train_ds, val_ds\n",
    "\n",
    "def new_iterator(dataset, n_jobs: int = -1,  **kwargs):\n",
    "    it_cls = iterators.SerialIterator\n",
    "    \n",
    "    if n_jobs >= 1:\n",
    "        kwargs[\"n_processes\"] = n_jobs\n",
    "        it_cls = iterators.MultiprocessIterator\n",
    "    \n",
    "    return it_cls(dataset, **kwargs)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate(model: chainer.Chain, iterator, device_id: int = -1) -> float:\n",
    "    if device_id >= 0:\n",
    "        device = chainer.backends.cuda.get_device_from_id(device_id)\n",
    "        device.use()\n",
    "    else:\n",
    "        # for CPU mode\n",
    "        device = device_id\n",
    "    \n",
    "    model.to_device(device)\n",
    "    \n",
    "    n_batches = int(np.ceil(len(iterator.dataset) / iterator.batch_size))\n",
    "    \n",
    "    preds = []\n",
    "    labels = []\n",
    "    \n",
    "    iterator.reset()\n",
    "    for batch in tqdm(iterator, total=n_batches):\n",
    "        X, y = concat_examples(batch, device=device)\n",
    "        \n",
    "        # shape is (batch_size, #classes)\n",
    "        logits = model(X)\n",
    "        \n",
    "        logits.to_cpu()\n",
    "\n",
    "        # get the class ID with the highest score\n",
    "        preds.extend(logits.array.argmax(axis=-1))\n",
    "        labels.extend(chainer.cuda.to_cpu(y))\n",
    "    \n",
    "    return np.mean(np.array(preds) == np.array(labels))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main(args: Arguments):\n",
    "    model = load_model(args.weights)\n",
    "\n",
    "    print(f\"Created {model.meta.name} model with weights from \\\"{args.weights}\\\"\")\n",
    "\n",
    "    train_ds, val_ds = load_datasets(args.dataset, model.meta.input_size, model.prepare, split_id=args.split_id)\n",
    "    print(f\"Found {len(train_ds)} training and {len(val_ds)} validation images\")\n",
    "\n",
    "    val_it = new_iterator(val_ds, \n",
    "                          n_jobs=args.n_jobs, \n",
    "                          batch_size=args.batch_size,\n",
    "                          repeat=False,\n",
    "                          shuffle=False\n",
    "                         )\n",
    "\n",
    "    with chainer.using_config(\"train\", False), chainer.no_backprop_mode():\n",
    "        accu = evaluate(model, val_it, device_id=args.device_id)\n",
    "        print(f\"Accuracy: {accu:.2%}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Arguments:\n",
      "  dataset: data/eu_moths\n",
      "  weights: data/2021-03-16-15.12.12.019294678_clf_final.npz\n",
      "  split_id: 0\n",
      "  device_id: 0\n",
      "  batch_size: 32\n",
      "  n_jobs: 4\n",
      "\n",
      "Created InceptionV3 model with weights from \"data/2021-03-16-15.12.12.019294678_clf_final.npz\"\n",
      "Found 1653 training and 552 validation images\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f3c8e8a2dfb74b3d984b6b1853174f58",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/18 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/korsch/.miniconda3/envs/ba_king/lib/python3.8/site-packages/chainer/functions/connection/convolution_2d.py:295: PerformanceWarning: The best algo of conv fwd might not be selected due to lack of workspace size (8388608)\n",
      "  cuda.cudnn.convolution_forward(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 92.21%\n"
     ]
    }
   ],
   "source": [
    "args = Arguments(\n",
    "    dataset=\"data/eu_moths\",\n",
    "    weights=\"data/clf_final_2021-03-16_0003.npz\",\n",
    ")\n",
    "print(args)\n",
    "\n",
    "main(args)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}