LocalFeatures.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # Functions related to approach 3 (local features).
  2. # For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.
  3. import cv2 as cv
  4. import numpy as np
  5. from tqdm import tqdm
  6. from py.Session import SessionImage
  7. def dense_keypoints(img, step=30, off=(15, 12)):
  8. """Generates a list of densely sampled keypoints on img.
  9. Args:
  10. img (_type_): Image to sample from. (only the shape is relevant)
  11. step (int, optional): Vertical and horizontal step size between and size of keypoints. Defaults to 30.
  12. off (tuple, optional): y and x offset of the first keypoint in the grid. Defaults to (15, 12).
  13. Returns:
  14. list[cv.KeyPoint]: List of keypoints
  15. """
  16. border_dist = (step + 1) // 2
  17. return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step)
  18. for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
  19. def extract_descriptors(images: list[SessionImage]):
  20. """Extracts DSIFT descriptors from the provided images and returns them in a single array.
  21. Args:
  22. images (list[SessionImage]): List of images to read and compute descriptors from.
  23. Returns:
  24. np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
  25. """
  26. sift = cv.SIFT_create()
  27. dscs = []
  28. for image in tqdm(images):
  29. img = image.read_opencv(gray=True)
  30. kp = dense_keypoints(img)
  31. kp, des = sift.compute(img, kp)
  32. dscs.append(des)
  33. return np.array(dscs)
  34. def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
  35. """Clusters the given (D)SIFT descriptors using k-means.
  36. This may take a while depending on the number of descriptors.
  37. Args:
  38. dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
  39. dictionary_size (int): Number of k-means clusters.
  40. Returns:
  41. np.array, shape=(dictionary_size, 128): BOW dictionary.
  42. """
  43. BOW = cv.BOWKMeansTrainer(dictionary_size)
  44. for dsc in dscs:
  45. BOW.add(dsc)
  46. dictionary = BOW.cluster()
  47. return dictionary
  48. def generate_bow_features(images: list[SessionImage], dictionary):
  49. """Calculates the BOW features for the provided images using dictionary.
  50. Yields a feature vector for every image.
  51. Args:
  52. images (list[SessionImage]): List of images to read and compute feature vectors from.
  53. dictionary (np.array, shape=(-1, 128)): BOW dictionary.
  54. Yields:
  55. (str, np.array of shape=(dictionary.shape[0])): (filename, feature vector)
  56. """
  57. flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
  58. sift = cv.SIFT_create()
  59. bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
  60. bow_extractor.setVocabulary(dictionary)
  61. for image in tqdm(images):
  62. img = image.read_opencv(gray=True)
  63. kp = dense_keypoints(img)
  64. feat = bow_extractor.compute(img, kp)
  65. yield image.filename, feat