LocalFeatures.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # Functions related to approach 3 (local features).
  2. # For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.
  3. import cv2 as cv
  4. import numpy as np
  5. from tqdm import tqdm
  6. from sklearn.cluster import KMeans
  7. from py.Session import SessionImage
  8. def dense_keypoints(img, step=30, size=60):
  9. """Generates a list of densely sampled keypoints on img. The keypoints are arranged tightly
  10. next to each other without spacing. The group of all keypoints is centered in the image.
  11. Args:
  12. img (_type_): Image to sample from. (only the shape is relevant)
  13. step (int, optional): Vertical and horizontal step size between keypoints. Defaults to 30.
  14. size (int, optional): Size of keypoints. Defaults to 60.
  15. Returns:
  16. list[cv.KeyPoint]: List of keypoints
  17. """
  18. # calculate offset to center keypoints
  19. off = ((img.shape[0] % step) // 2, (img.shape[1] % step) // 2)
  20. border_dist = (step + 1) // 2
  21. return [cv.KeyPoint(x, y, size) for y in range(border_dist + off[0], img.shape[0] - border_dist, step)
  22. for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
  23. def extract_descriptors(images: list[SessionImage], kp_step: int = 30, kp_size: int = 60):
  24. """Extracts DSIFT descriptors from the provided images and returns them in a single array.
  25. Args:
  26. images (list[SessionImage]): List of images to read and compute descriptors from.
  27. kp_step (int, optional): Keypoint step size, see dense_keypoints. Defaults to 30.
  28. kp_size (int, optional): Keypoint size, see dense_keypoints. Defaults to 60.
  29. Returns:
  30. np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
  31. """
  32. sift = cv.SIFT_create()
  33. dscs = []
  34. output_kp = False
  35. for image in tqdm(images):
  36. img = image.read_opencv(gray=True)
  37. kp = dense_keypoints(img, kp_step, kp_size)
  38. # output number of keypoints once
  39. if not output_kp:
  40. print(f"{len(kp)} keypoints per image.")
  41. output_kp = True
  42. kp, des = sift.compute(img, kp)
  43. dscs.extend(des)
  44. return np.array(dscs).reshape(-1, 128)
  45. def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
  46. """Clusters the given (D)SIFT descriptors using k-means.
  47. This may take a while depending on the number of descriptors.
  48. Args:
  49. dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
  50. dictionary_size (int): Number of k-means clusters.
  51. Returns:
  52. np.array, shape=(dictionary_size, 128): BOW dictionary.
  53. """
  54. assert len(dscs.shape) == 2 and dscs.shape[1] == 128
  55. assert dictionary_size > 0 and dictionary_size <= dscs.shape[0]
  56. kmeans = KMeans(dictionary_size, verbose=1).fit(dscs)
  57. dictionary = kmeans.cluster_centers_
  58. assert dictionary.shape == (dictionary_size, 128)
  59. return dictionary
  60. def generate_bow_features(images: list[SessionImage], dictionary, kp_step: int = 30, kp_size: int = 60):
  61. """Calculates the BOW features for the provided images using dictionary.
  62. Yields a feature vector for every image.
  63. Args:
  64. images (list[SessionImage]): List of images to read and compute feature vectors from.
  65. dictionary (np.array, shape=(-1, 128)): BOW dictionary.
  66. kp_step (int, optional): Keypoint step size, see dense_keypoints. Must be identical to the step size used for vocabulary generation. Defaults to 30.
  67. kp_size (int, optional): Keypoint size, see dense_keypoints. Must be identical to the size used for vocabulary generation. Defaults to 60.
  68. Yields:
  69. (str, np.array of shape=(dictionary.shape[0])): (filename, feature vector)
  70. """
  71. assert len(dictionary.shape) == 2 and dictionary.shape[1] == 128
  72. assert kp_size > 0 and kp_step > 0
  73. flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
  74. sift = cv.SIFT_create()
  75. bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
  76. bow_extractor.setVocabulary(dictionary)
  77. for image in tqdm(images):
  78. img = image.read_opencv(gray=True)
  79. kp = dense_keypoints(img, kp_step, kp_size)
  80. feat = bow_extractor.compute(img, kp)
  81. yield image.filename, feat