{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Installation\n", "\n", "1. Install miniconda according to the [instructions](https://docs.conda.io/en/latest/miniconda.html) for your OS\n", "\n", "2. Create an environment\n", "```bash\n", "conda create -n myenv python~=3.8.0 matplotlib jupyter opencv\n", "conda activate myenv\n", "```\n", "\n", "3. Install CUDA / cuDNN and required libraries:\n", "```bash\n", "conda install -c conda-forge cudatoolkit~=11.0.0 cudnn~=8.0.0\n", "pip install chainer~=7.0 cupy_cuda110~=7.0 chainercv~=0.13.0\n", "pip install cvmodelz pyaml tabulate tqdm\n", "```\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "=== Chainer info ===\n", "_will_recompute False\n", "autotune False\n", "compute_mode None\n", "cudnn_deterministic False\n", "cudnn_fast_batch_normalization False\n", "cv_read_image_backend None\n", "cv_resize_backend cv2\n", "cv_rotate_backend None\n", "debug False\n", "dtype float32\n", "enable_backprop True\n", "in_recomputing False\n", "keep_graph_on_report False\n", "lazy_grad_sum False\n", "schedule_func None\n", "train True\n", "type_check True\n", "use_cudnn auto\n", "use_cudnn_tensor_core auto\n", "use_ideep never\n", "use_static_graph True\n", "warn_nondeterministic False\n", "=== CuPy info ===\n", "CuPy Version : 7.8.0\n", "CUDA Root : None\n", "CUDA Build Version : 11000\n", "CUDA Driver Version : 11020\n", "CUDA Runtime Version : 11000\n", "cuBLAS Version : 11200\n", "cuFFT Version : 10201\n", "cuRAND Version : 10201\n", "cuSOLVER Version : (10, 6, 0)\n", "cuSPARSE Version : 11101\n", "NVRTC Version : (11, 0)\n", "cuDNN Build Version : 8002\n", "cuDNN Version : 8005\n", "NCCL Build Version : 2708\n", "NCCL Runtime Version : 2708\n", "CUB Version : Enabled\n", "cuTENSOR Version : None\n" ] } ], "source": [ "import chainer\n", "import cupy\n", "import numpy as np\n", "import pyaml\n", "import cv2\n", "cv2.setNumThreads(0)\n", "\n", "from dataclasses import dataclass\n", "from pathlib import Path\n", "from typing import Callable\n", "from tqdm.auto import tqdm\n", "from imageio import imread\n", "\n", "from chainer import datasets\n", "from chainer import iterators\n", "from chainer.dataset import concat_examples\n", "from chainercv import transforms as tr\n", "from cvmodelz.models import ModelFactory\n", "from cvmodelz.models import BaseModel\n", "\n", "\n", "chainer.config.cv_resize_backend = \"cv2\"\n", "print(\"=== Chainer info ===\")\n", "chainer.config.show()\n", "print(\"=== CuPy info ===\")\n", "cupy.show_config()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "@dataclass\n", "class Arguments(object):\n", " \n", " def __str__(self):\n", " return pyaml.dump(dict(Arguments=self.__dict__), sort_dicts=False)\n", " \n", " dataset: str = \"eu_moths\"\n", " weights: str = \"clf_final.npz\"\n", " \n", " split_id: int = 0\n", " \n", " device_id: int = 0\n", " batch_size: int = 32\n", " n_jobs: int = 4\n", " " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class Dataset(chainer.dataset.DatasetMixin):\n", " \n", " def __init__(self, root: str, split_id: int, is_train: bool = True):\n", " super().__init__()\n", " \n", " root = Path(root)\n", " self._root = root\n", " self.class_names = np.loadtxt(root / \"class_names.txt\", dtype=\"U255\")\n", " \n", " _images = np.loadtxt(root / \"images.txt\", dtype=[(\"id\", np.int32), (\"fname\", \"U255\")])\n", " _labels = np.loadtxt(root / \"labels.txt\", dtype=np.int32)\n", " _split_ids = np.loadtxt(root / \"tr_ID.txt\", dtype=np.int32)\n", " \n", " if is_train:\n", " # select all other splits\n", " split_mask = _split_ids != split_id\n", " \n", " else:\n", " # select only images for a given split ID\n", " split_mask = _split_ids == split_id \n", " \n", " self.images = _images[\"fname\"][split_mask]\n", " self.labels = _labels[split_mask]\n", " \n", " \n", " def __len__(self):\n", " return len(self.images)\n", " \n", " def get_example(self, i):\n", " im_path = self._root / \"images\" / self.images[i]\n", " label = self.labels[i]\n", " return imread(im_path, pilmode=\"RGB\"), label\n", "\n", "class DataTransformer(object):\n", " \n", " def __init__(self, prepare: Callable, size: int):\n", " super().__init__()\n", " self.prepare = prepare\n", " self.size = size\n", " \n", " def __call__(self, data):\n", " image, label = data\n", " new_image = self.prepare(image, self.size)\n", " \n", " new_image = tr.center_crop(new_image, size=(self.size, self.size))\n", "\n", " # transform the pixel range from 0..1 to -1..1\n", " new_image = new_image * 2 - 1\n", " return new_image, label" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def load_model(weights: str):\n", " \n", " model = ModelFactory.new(\"cvmodelz.InceptionV3\", n_classes=200)\n", " model.load(weights, path=\"model/\")\n", " return model\n", "\n", "def load_datasets(root: Path, model_input_size: int, prepare: Callable, split_id: int):\n", " \n", " train_ds = Dataset(root, split_id=split_id, is_train=True)\n", " val_ds = Dataset(root, split_id=split_id, is_train=False)\n", " \n", " transformer = DataTransformer(prepare, model_input_size)\n", " train_ds = datasets.TransformDataset(train_ds, transformer)\n", " val_ds = datasets.TransformDataset(val_ds, transformer)\n", " \n", " return train_ds, val_ds\n", "\n", "def new_iterator(dataset, n_jobs: int = -1, **kwargs):\n", " it_cls = iterators.SerialIterator\n", " \n", " if n_jobs >= 1:\n", " kwargs[\"n_processes\"] = n_jobs\n", " it_cls = iterators.MultiprocessIterator\n", " \n", " return it_cls(dataset, **kwargs)\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def evaluate(model: chainer.Chain, iterator, device_id: int = -1) -> float:\n", " if device_id >= 0:\n", " device = chainer.backends.cuda.get_device_from_id(device_id)\n", " device.use()\n", " else:\n", " # for CPU mode\n", " device = device_id\n", " \n", " model.to_device(device)\n", " \n", " n_batches = int(np.ceil(len(iterator.dataset) / iterator.batch_size))\n", " \n", " preds = []\n", " labels = []\n", " \n", " iterator.reset()\n", " for batch in tqdm(iterator, total=n_batches):\n", " X, y = concat_examples(batch, device=device)\n", " \n", " # shape is (batch_size, #classes)\n", " logits = model(X)\n", " \n", " logits.to_cpu()\n", "\n", " # get the class ID with the highest score\n", " preds.extend(logits.array.argmax(axis=-1))\n", " labels.extend(chainer.cuda.to_cpu(y))\n", " \n", " return np.mean(np.array(preds) == np.array(labels))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def main(args: Arguments):\n", " model = load_model(args.weights)\n", "\n", " print(f\"Created {model.meta.name} model with weights from \\\"{args.weights}\\\"\")\n", "\n", " train_ds, val_ds = load_datasets(args.dataset, model.meta.input_size, model.prepare, split_id=args.split_id)\n", " print(f\"Found {len(train_ds)} training and {len(val_ds)} validation images\")\n", "\n", " val_it = new_iterator(val_ds, \n", " n_jobs=args.n_jobs, \n", " batch_size=args.batch_size,\n", " repeat=False,\n", " shuffle=False\n", " )\n", "\n", " with chainer.using_config(\"train\", False), chainer.no_backprop_mode():\n", " accu = evaluate(model, val_it, device_id=args.device_id)\n", " print(f\"Accuracy: {accu:.2%}\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Arguments:\n", " dataset: data/eu_moths\n", " weights: data/2021-03-16-15.12.12.019294678_clf_final.npz\n", " split_id: 0\n", " device_id: 0\n", " batch_size: 32\n", " n_jobs: 4\n", "\n", "Created InceptionV3 model with weights from \"data/2021-03-16-15.12.12.019294678_clf_final.npz\"\n", "Found 1653 training and 552 validation images\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f3c8e8a2dfb74b3d984b6b1853174f58", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/18 [00:00