# Installation

1. Install miniconda according to the [instructions](https://docs.conda.io/en/latest/miniconda.html) for your OS

2. Create an environment
```bash
conda create -n myenv python~=3.8.0 matplotlib jupyter opencv
conda activate myenv
```

3. Install CUDA / cuDNN and required libraries:
```bash
conda install -c conda-forge cudatoolkit~=11.0.0 cudnn~=8.0.0
pip install chainer~=7.0 cupy_cuda110~=7.0 chainercv~=0.13.0
pip install cvmodelz pyaml tabulate tqdm

```


In [1]:
import chainer
import cupy
import numpy as np
import pyaml
import cv2
cv2.setNumThreads(0)

from dataclasses import dataclass
from pathlib import Path
from typing import Callable
from tqdm.auto import tqdm
from imageio import imread

from chainer import datasets
from chainer import iterators
from chainer.dataset import concat_examples
from chainercv import transforms as tr
from cvmodelz.models import ModelFactory
from cvmodelz.models import BaseModel


chainer.config.cv_resize_backend = "cv2"
print("=== Chainer info ===")
chainer.config.show()
print("=== CuPy info ===")
cupy.show_config()

=== Chainer info ===
_will_recompute                False
autotune                       False
compute_mode                   None
cudnn_deterministic            False
cudnn_fast_batch_normalization False
cv_read_image_backend          None
cv_resize_backend              cv2
cv_rotate_backend              None
debug                          False
dtype                          float32
enable_backprop                True
in_recomputing                 False
keep_graph_on_report           False
lazy_grad_sum                  False
schedule_func                  None
train                          True
type_check                     True
use_cudnn                      auto
use_cudnn_tensor_core          auto
use_ideep                      never
use_static_graph               True
warn_nondeterministic          False
=== CuPy info ===
CuPy Version          : 7.8.0
CUDA Root             : None
CUDA Build Version    : 11000
CUDA Driver Version   : 11020
CUDA Runtime Version  : 11000
cuBLAS V

In [2]:
@dataclass
class Arguments(object):
    
    def __str__(self):
        return pyaml.dump(dict(Arguments=self.__dict__), sort_dicts=False)
    
    dataset:      str = "eu_moths"
    weights:      str = "clf_final.npz"
        
    split_id:     int  = 0
        
    device_id:    int  = 0
    batch_size:   int  = 32
    n_jobs:       int  = 4
        

In [3]:
class Dataset(chainer.dataset.DatasetMixin):
    
    def __init__(self, root: str, split_id: int, is_train: bool = True):
        super().__init__()
        
        root = Path(root)
        self._root = root
        self.class_names = np.loadtxt(root / "class_names.txt", dtype="U255")
        
        _images = np.loadtxt(root / "images.txt", dtype=[("id", np.int32), ("fname", "U255")])
        _labels = np.loadtxt(root / "labels.txt", dtype=np.int32)
        _split_ids = np.loadtxt(root / "tr_ID.txt", dtype=np.int32)
        
        if is_train:
            # select all other splits
            split_mask = _split_ids != split_id
            
        else:
            # select only images for a given split ID
            split_mask = _split_ids == split_id            
    
        self.images = _images["fname"][split_mask]
        self.labels = _labels[split_mask]
        
        
    def __len__(self):
        return len(self.images)
    
    def get_example(self, i):
        im_path = self._root / "images" / self.images[i]
        label = self.labels[i]
        return imread(im_path, pilmode="RGB"), label

class DataTransformer(object):
    
    def __init__(self, prepare: Callable, size: int):
        super().__init__()
        self.prepare = prepare
        self.size = size
    
    def __call__(self, data):
        image, label = data
        new_image = self.prepare(image, self.size)
        
        new_image = tr.center_crop(new_image, size=(self.size, self.size))

        # transform the pixel range from 0..1 to -1..1
        new_image = new_image * 2 - 1
        return new_image, label

In [4]:
def load_model(weights: str):
    
    model = ModelFactory.new("cvmodelz.InceptionV3", n_classes=200)
    model.load(weights, path="model/")
    return model

def load_datasets(root: Path, model_input_size: int, prepare: Callable, split_id: int):
    
    train_ds = Dataset(root, split_id=split_id, is_train=True)
    val_ds = Dataset(root, split_id=split_id, is_train=False)
    
    transformer = DataTransformer(prepare, model_input_size)
    train_ds = datasets.TransformDataset(train_ds, transformer)
    val_ds = datasets.TransformDataset(val_ds, transformer)
    
    return train_ds, val_ds

def new_iterator(dataset, n_jobs: int = -1,  **kwargs):
    it_cls = iterators.SerialIterator
    
    if n_jobs >= 1:
        kwargs["n_processes"] = n_jobs
        it_cls = iterators.MultiprocessIterator
    
    return it_cls(dataset, **kwargs)
    

In [5]:
def evaluate(model: chainer.Chain, iterator, device_id: int = -1) -> float:
    if device_id >= 0:
        device = chainer.backends.cuda.get_device_from_id(device_id)
        device.use()
    else:
        # for CPU mode
        device = device_id
    
    model.to_device(device)
    
    n_batches = int(np.ceil(len(iterator.dataset) / iterator.batch_size))
    
    preds = []
    labels = []
    
    iterator.reset()
    for batch in tqdm(iterator, total=n_batches):
        X, y = concat_examples(batch, device=device)
        
        # shape is (batch_size, #classes)
        logits = model(X)
        
        logits.to_cpu()

        # get the class ID with the highest score
        preds.extend(logits.array.argmax(axis=-1))
        labels.extend(chainer.cuda.to_cpu(y))
    
    return np.mean(np.array(preds) == np.array(labels))


In [6]:
def main(args: Arguments):
    model = load_model(args.weights)

    print(f"Created {model.meta.name} model with weights from \"{args.weights}\"")

    train_ds, val_ds = load_datasets(args.dataset, model.meta.input_size, model.prepare, split_id=args.split_id)
    print(f"Found {len(train_ds)} training and {len(val_ds)} validation images")

    val_it = new_iterator(val_ds, 
                          n_jobs=args.n_jobs, 
                          batch_size=args.batch_size,
                          repeat=False,
                          shuffle=False
                         )

    with chainer.using_config("train", False), chainer.no_backprop_mode():
        accu = evaluate(model, val_it, device_id=args.device_id)
        print(f"Accuracy: {accu:.2%}")

In [7]:
args = Arguments(
    dataset="data/eu_moths",
    weights="data/2021-03-16-15.12.12.019294678_clf_final.npz",
)
print(args)

main(args)

Arguments:
  dataset: data/eu_moths
  weights: data/2021-03-16-15.12.12.019294678_clf_final.npz
  split_id: 0
  device_id: 0
  batch_size: 32
  n_jobs: 4

Created InceptionV3 model with weights from "data/2021-03-16-15.12.12.019294678_clf_final.npz"
Found 1653 training and 552 validation images


  0%|          | 0/18 [00:00<?, ?it/s]

  cuda.cudnn.convolution_forward(


Accuracy: 92.21%
