kleinsteuber
/
camera-trap-anomaly-detection


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
							# Functions related to approach 3 (local features).
# For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.

import cv2 as cv
import numpy as np
from tqdm import tqdm

from py.Session import SessionImage

def dense_keypoints(img, step=30, off=(15, 12)):
    """Generates a list of densely sampled keypoints on img.

    Args:
        img (_type_): Image to sample from. (only the shape is relevant)
        step (int, optional): Vertical and horizontal step size between and size of keypoints. Defaults to 30.
        off (tuple, optional): y and x offset of the first keypoint in the grid. Defaults to (15, 12).

    Returns:
        list[cv.KeyPoint]: List of keypoints
    """
    border_dist = (step + 1) // 2
    return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) 
                                    for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]


def extract_descriptors(images: list[SessionImage]):
    """Extracts DSIFT descriptors from the provided images and returns them in a single array.

    Args:
        images (list[SessionImage]): List of images to read and compute descriptors from.

    Returns:
        np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
    """
    sift = cv.SIFT_create()
    dscs = []
    for image in tqdm(images):
        img = image.read_opencv(gray=True)
        kp = dense_keypoints(img)
        kp, des = sift.compute(img, kp)
        dscs.append(des)
    return np.array(dscs)

def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
    """Clusters the given (D)SIFT descriptors using k-means.
    This may take a while depending on the number of descriptors.

    Args:
        dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
        dictionary_size (int): Number of k-means clusters.

    Returns:
        np.array, shape=(dictionary_size, 128): BOW dictionary.
    """
    BOW = cv.BOWKMeansTrainer(dictionary_size)
    for dsc in dscs:
        BOW.add(dsc)
    dictionary = BOW.cluster()
    return dictionary

def generate_bow_features(images: list[SessionImage], dictionary):
    """Calculates the BOW features for the provided images using dictionary.
    Yields a feature vector for every image.

    Args:
        images (list[SessionImage]): List of images to read and compute feature vectors from.
        dictionary (np.array, shape=(-1, 128)): BOW dictionary.

    Yields:
        (str, np.array of shape=(dictionary.shape[0])): (filename, feature vector)
    """
    flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
    sift = cv.SIFT_create()
    bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
    bow_extractor.setVocabulary(dictionary)
    
    for image in tqdm(images):
        img = image.read_opencv(gray=True)
        kp = dense_keypoints(img)
        feat = bow_extractor.compute(img, kp)
        yield image.filename, feat