# Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena # Functions related to approach 3 (local features). # For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py. import cv2 as cv import numpy as np from tqdm import tqdm from sklearn.cluster import KMeans from py.Session import SessionImage def dense_keypoints(img, step=30, size=60): """Generates a list of densely sampled keypoints on img. The keypoints are arranged tightly next to each other without spacing. The group of all keypoints is centered in the image. Args: img (_type_): Image to sample from. (only the shape is relevant) step (int, optional): Vertical and horizontal step size between keypoints. Defaults to 30. size (int, optional): Size of keypoints. Defaults to 60. Returns: list[cv.KeyPoint]: List of keypoints """ # calculate offset to center keypoints off = ((img.shape[0] % step) // 2, (img.shape[1] % step) // 2) border_dist = (step + 1) // 2 return [cv.KeyPoint(x, y, size) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) for x in range(border_dist + off[1], img.shape[1] - border_dist, step)] def extract_descriptors(images: list[SessionImage], kp_step: int = 30, kp_size: int = 60): """Extracts DSIFT descriptors from the provided images and returns them in a single array. Args: images (list[SessionImage]): List of images to read and compute descriptors from. kp_step (int, optional): Keypoint step size, see dense_keypoints. Defaults to 30. kp_size (int, optional): Keypoint size, see dense_keypoints. Defaults to 60. Returns: np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors. """ sift = cv.SIFT_create() dscs = [] output_kp = False for image in tqdm(images): img = image.read_opencv(gray=True) kp = dense_keypoints(img, kp_step, kp_size) # output number of keypoints once if not output_kp: print(f"{len(kp)} keypoints per image.") output_kp = True kp, des = sift.compute(img, kp) dscs.extend(des) return np.array(dscs).reshape(-1, 128) def generate_dictionary_from_descriptors(dscs, dictionary_size: int): """Clusters the given (D)SIFT descriptors using k-means. This may take a while depending on the number of descriptors. Args: dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering. dictionary_size (int): Number of k-means clusters. Returns: np.array, shape=(dictionary_size, 128): BOW dictionary. """ assert len(dscs.shape) == 2 and dscs.shape[1] == 128 assert dictionary_size > 0 and dictionary_size <= dscs.shape[0] kmeans = KMeans(dictionary_size, verbose=1).fit(dscs) dictionary = kmeans.cluster_centers_ assert dictionary.shape == (dictionary_size, 128) return dictionary def pick_random_descriptors(dscs, dictionary_size: int): """Picks dictionary_size random descriptors to use as a vocabulary. Much faster but less accurate alternative to kmeans clustering. Args: dscs (np.array, shape(-1, 128)): (D)SIFT descriptors to pick from. dictionary_size (int): Number of clusters / vocabulary size. Returns: np.array, shape=(dictionary_size, 128): Randomly picked BOW dictionary. """ assert len(dscs.shape) == 2 and dscs.shape[1] == 128 assert dictionary_size > 0 and dictionary_size <= dscs.shape[0] return dscs[np.random.choice(len(dscs), size=dictionary_size, replace=False)] def generate_bow_features(images: list[SessionImage], dictionaries, kp_step: int = 30, kp_size: int = 60): """Calculates the BOW features for the provided images for every dictionary. Yields a list of feature vectors for every image. Args: images (list[SessionImage]): List of images to read and compute feature vectors from. dictionaries (np.array of shape=(num_dicts, dict_size, 128)): List of BOW dictionaries. kp_step (int, optional): Keypoint step size, see dense_keypoints. Must be identical to the step size used for vocabulary generation. Defaults to 30. kp_size (int, optional): Keypoint size, see dense_keypoints. Must be identical to the size used for vocabulary generation. Defaults to 60. Yields: (str, np.array of shape=(num_dicts, dict_size)): (filename, feature vectors) """ assert len(dictionaries.shape) == 3 and dictionaries.shape[2] == 128 assert kp_size > 0 and kp_step > 0 extractors = [] for dictionary in dictionaries: flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50}) sift = cv.SIFT_create() bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2) bow_extractor.setVocabulary(dictionary) extractors.append(bow_extractor) for image in tqdm(images): img = image.read_opencv(gray=True) kp = dense_keypoints(img, kp_step, kp_size) feat = np.array([ext.compute(img, kp) for ext in extractors]) yield image.filename, feat