LocalFeatures.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena
  2. # Functions related to approach 3 (local features).
  3. # For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.
  4. import cv2 as cv
  5. import numpy as np
  6. from tqdm import tqdm
  7. from sklearn.cluster import KMeans
  8. from py.Session import SessionImage
  9. def dense_keypoints(img, step=30, size=60):
  10. """Generates a list of densely sampled keypoints on img. The keypoints are arranged tightly
  11. next to each other without spacing. The group of all keypoints is centered in the image.
  12. Args:
  13. img (_type_): Image to sample from. (only the shape is relevant)
  14. step (int, optional): Vertical and horizontal step size between keypoints. Defaults to 30.
  15. size (int, optional): Size of keypoints. Defaults to 60.
  16. Returns:
  17. list[cv.KeyPoint]: List of keypoints
  18. """
  19. # calculate offset to center keypoints
  20. off = ((img.shape[0] % step) // 2, (img.shape[1] % step) // 2)
  21. border_dist = (step + 1) // 2
  22. return [cv.KeyPoint(x, y, size) for y in range(border_dist + off[0], img.shape[0] - border_dist, step)
  23. for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
  24. def extract_descriptors(images: list[SessionImage], kp_step: int = 30, kp_size: int = 60):
  25. """Extracts DSIFT descriptors from the provided images and returns them in a single array.
  26. Args:
  27. images (list[SessionImage]): List of images to read and compute descriptors from.
  28. kp_step (int, optional): Keypoint step size, see dense_keypoints. Defaults to 30.
  29. kp_size (int, optional): Keypoint size, see dense_keypoints. Defaults to 60.
  30. Returns:
  31. np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
  32. """
  33. sift = cv.SIFT_create()
  34. dscs = []
  35. output_kp = False
  36. for image in tqdm(images):
  37. img = image.read_opencv(gray=True)
  38. kp = dense_keypoints(img, kp_step, kp_size)
  39. # output number of keypoints once
  40. if not output_kp:
  41. print(f"{len(kp)} keypoints per image.")
  42. output_kp = True
  43. kp, des = sift.compute(img, kp)
  44. dscs.extend(des)
  45. return np.array(dscs).reshape(-1, 128)
  46. def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
  47. """Clusters the given (D)SIFT descriptors using k-means.
  48. This may take a while depending on the number of descriptors.
  49. Args:
  50. dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
  51. dictionary_size (int): Number of k-means clusters.
  52. Returns:
  53. np.array, shape=(dictionary_size, 128): BOW dictionary.
  54. """
  55. assert len(dscs.shape) == 2 and dscs.shape[1] == 128
  56. assert dictionary_size > 0 and dictionary_size <= dscs.shape[0]
  57. kmeans = KMeans(dictionary_size, verbose=1).fit(dscs)
  58. dictionary = kmeans.cluster_centers_
  59. assert dictionary.shape == (dictionary_size, 128)
  60. return dictionary
  61. def pick_random_descriptors(dscs, dictionary_size: int):
  62. """Picks dictionary_size random descriptors to use as a vocabulary.
  63. Much faster but less accurate alternative to kmeans clustering.
  64. Args:
  65. dscs (np.array, shape(-1, 128)): (D)SIFT descriptors to pick from.
  66. dictionary_size (int): Number of clusters / vocabulary size.
  67. Returns:
  68. np.array, shape=(dictionary_size, 128): Randomly picked BOW dictionary.
  69. """
  70. assert len(dscs.shape) == 2 and dscs.shape[1] == 128
  71. assert dictionary_size > 0 and dictionary_size <= dscs.shape[0]
  72. return dscs[np.random.choice(len(dscs), size=dictionary_size, replace=False)]
  73. def generate_bow_features(images: list[SessionImage], dictionaries, kp_step: int = 30, kp_size: int = 60):
  74. """Calculates the BOW features for the provided images for every dictionary.
  75. Yields a list of feature vectors for every image.
  76. Args:
  77. images (list[SessionImage]): List of images to read and compute feature vectors from.
  78. dictionaries (np.array of shape=(num_dicts, dict_size, 128)): List of BOW dictionaries.
  79. kp_step (int, optional): Keypoint step size, see dense_keypoints. Must be identical to the step size used for vocabulary generation. Defaults to 30.
  80. kp_size (int, optional): Keypoint size, see dense_keypoints. Must be identical to the size used for vocabulary generation. Defaults to 60.
  81. Yields:
  82. (str, np.array of shape=(num_dicts, dict_size)): (filename, feature vectors)
  83. """
  84. assert len(dictionaries.shape) == 3 and dictionaries.shape[2] == 128
  85. assert kp_size > 0 and kp_step > 0
  86. extractors = []
  87. for dictionary in dictionaries:
  88. flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
  89. sift = cv.SIFT_create()
  90. bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
  91. bow_extractor.setVocabulary(dictionary)
  92. extractors.append(bow_extractor)
  93. for image in tqdm(images):
  94. img = image.read_opencv(gray=True)
  95. kp = dense_keypoints(img, kp_step, kp_size)
  96. feat = np.array([ext.compute(img, kp) for ext in extractors])
  97. yield image.filename, feat