123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- # Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena
- # Functions related to approach 3 (local features).
- # For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.
- import cv2 as cv
- import numpy as np
- from tqdm import tqdm
- from sklearn.cluster import KMeans
- from py.Session import SessionImage
- def dense_keypoints(img, step=30, size=60):
- """Generates a list of densely sampled keypoints on img. The keypoints are arranged tightly
- next to each other without spacing. The group of all keypoints is centered in the image.
- Args:
- img (_type_): Image to sample from. (only the shape is relevant)
- step (int, optional): Vertical and horizontal step size between keypoints. Defaults to 30.
- size (int, optional): Size of keypoints. Defaults to 60.
- Returns:
- list[cv.KeyPoint]: List of keypoints
- """
- # calculate offset to center keypoints
- off = ((img.shape[0] % step) // 2, (img.shape[1] % step) // 2)
- border_dist = (step + 1) // 2
- return [cv.KeyPoint(x, y, size) for y in range(border_dist + off[0], img.shape[0] - border_dist, step)
- for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
- def extract_descriptors(images: list[SessionImage], kp_step: int = 30, kp_size: int = 60):
- """Extracts DSIFT descriptors from the provided images and returns them in a single array.
- Args:
- images (list[SessionImage]): List of images to read and compute descriptors from.
- kp_step (int, optional): Keypoint step size, see dense_keypoints. Defaults to 30.
- kp_size (int, optional): Keypoint size, see dense_keypoints. Defaults to 60.
- Returns:
- np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
- """
- sift = cv.SIFT_create()
- dscs = []
- output_kp = False
- for image in tqdm(images):
- img = image.read_opencv(gray=True)
- kp = dense_keypoints(img, kp_step, kp_size)
- # output number of keypoints once
- if not output_kp:
- print(f"{len(kp)} keypoints per image.")
- output_kp = True
- kp, des = sift.compute(img, kp)
- dscs.extend(des)
- return np.array(dscs).reshape(-1, 128)
- def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
- """Clusters the given (D)SIFT descriptors using k-means.
- This may take a while depending on the number of descriptors.
- Args:
- dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
- dictionary_size (int): Number of k-means clusters.
- Returns:
- np.array, shape=(dictionary_size, 128): BOW dictionary.
- """
- assert len(dscs.shape) == 2 and dscs.shape[1] == 128
- assert dictionary_size > 0 and dictionary_size <= dscs.shape[0]
- kmeans = KMeans(dictionary_size, verbose=1).fit(dscs)
- dictionary = kmeans.cluster_centers_
- assert dictionary.shape == (dictionary_size, 128)
- return dictionary
- def pick_random_descriptors(dscs, dictionary_size: int):
- """Picks dictionary_size random descriptors to use as a vocabulary.
- Much faster but less accurate alternative to kmeans clustering.
- Args:
- dscs (np.array, shape(-1, 128)): (D)SIFT descriptors to pick from.
- dictionary_size (int): Number of clusters / vocabulary size.
- Returns:
- np.array, shape=(dictionary_size, 128): Randomly picked BOW dictionary.
- """
- assert len(dscs.shape) == 2 and dscs.shape[1] == 128
- assert dictionary_size > 0 and dictionary_size <= dscs.shape[0]
- return dscs[np.random.choice(len(dscs), size=dictionary_size, replace=False)]
- def generate_bow_features(images: list[SessionImage], dictionaries, kp_step: int = 30, kp_size: int = 60):
- """Calculates the BOW features for the provided images for every dictionary.
- Yields a list of feature vectors for every image.
- Args:
- images (list[SessionImage]): List of images to read and compute feature vectors from.
- dictionaries (np.array of shape=(num_dicts, dict_size, 128)): List of BOW dictionaries.
- kp_step (int, optional): Keypoint step size, see dense_keypoints. Must be identical to the step size used for vocabulary generation. Defaults to 30.
- kp_size (int, optional): Keypoint size, see dense_keypoints. Must be identical to the size used for vocabulary generation. Defaults to 60.
- Yields:
- (str, np.array of shape=(num_dicts, dict_size)): (filename, feature vectors)
- """
- assert len(dictionaries.shape) == 3 and dictionaries.shape[2] == 128
- assert kp_size > 0 and kp_step > 0
- extractors = []
- for dictionary in dictionaries:
- flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
- sift = cv.SIFT_create()
- bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
- bow_extractor.setVocabulary(dictionary)
- extractors.append(bow_extractor)
-
- for image in tqdm(images):
- img = image.read_opencv(gray=True)
- kp = dense_keypoints(img, kp_step, kp_size)
- feat = np.array([ext.compute(img, kp) for ext in extractors])
- yield image.filename, feat
|