train_bow.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # Approach 3: Local features
  2. # This script is used for generating a BOW vocabulary using
  3. # densely sampeled SIFT features on Lapse images.
  4. # See eval_bow.py for evaluation.
  5. import argparse
  6. import os
  7. import numpy as np
  8. from timeit import default_timer as timer
  9. from datetime import timedelta
  10. from py.Dataset import Dataset
  11. from py.LocalFeatures import extract_descriptors, generate_dictionary_from_descriptors, generate_bow_features, pick_random_descriptors
  12. def main():
  13. parser = argparse.ArgumentParser(description="BOW train script")
  14. parser.add_argument("dataset_dir", type=str, help="Directory of the dataset containing all session folders")
  15. parser.add_argument("session_name", type=str, help="Name of the session to use for Lapse images (e.g. marten_01)")
  16. parser.add_argument("--clusters", type=int, help="Number of clusters / BOW vocabulary size", default=1024)
  17. parser.add_argument("--step_size", type=int, help="DSIFT keypoint step size. Smaller step size = more keypoints.", default=30)
  18. parser.add_argument("--keypoint_size", type=int, help="DSIFT keypoint size. Defaults to step_size.", default=-1)
  19. parser.add_argument("--include_motion", action="store_true", help="Include motion images for training.")
  20. parser.add_argument("--random_prototypes", action="store_true", help="Pick random prototype vectors instead of doing kmeans.")
  21. args = parser.parse_args()
  22. if args.keypoint_size <= 0:
  23. args.keypoint_size = args.step_size
  24. print(f"Using keypoint size {args.keypoint_size} with step size {args.step_size}.")
  25. ds = Dataset(args.dataset_dir)
  26. session = ds.create_session(args.session_name)
  27. save_dir = f"./bow_train_NoBackup/{session.name}"
  28. suffix = ""
  29. if args.include_motion:
  30. suffix += "_motion"
  31. print("Including motion data for prototype selection!")
  32. if args.random_prototypes:
  33. suffix += "_random"
  34. print("Picking random prototypes instead of using kmeans!")
  35. lapse_dscs_file = os.path.join(save_dir, f"lapse_dscs_{args.step_size}_{args.keypoint_size}.npy")
  36. motion_dscs_file = os.path.join(save_dir, f"motion_dscs_{args.step_size}_{args.keypoint_size}.npy")
  37. dictionary_file = os.path.join(save_dir, f"bow_dict_{args.step_size}_{args.keypoint_size}_{args.clusters}{suffix}.npy")
  38. train_feat_file = os.path.join(save_dir, f"bow_train_{args.step_size}_{args.keypoint_size}_{args.clusters}{suffix}.npy")
  39. # Lapse DSIFT descriptors
  40. if os.path.isfile(lapse_dscs_file):
  41. if os.path.isfile(dictionary_file):
  42. # if dictionary file already exists, we don't need the lapse descriptors
  43. print(f"{dictionary_file} already exists, skipping lapse descriptor extraction...")
  44. else:
  45. print(f"{lapse_dscs_file} already exists, loading lapse descriptors from file... ", end="")
  46. lapse_dscs = np.load(lapse_dscs_file)
  47. assert lapse_dscs.shape[-1] == 128
  48. lapse_dscs = lapse_dscs.reshape(-1, 128)
  49. print(f"Loaded {len(lapse_dscs)} lapse descriptors!")
  50. else:
  51. # Step 1 - extract dense SIFT descriptors
  52. print("Extracting lapse descriptors...")
  53. lapse_dscs = extract_descriptors(list(session.generate_lapse_images()), kp_step=args.step_size, kp_size=args.keypoint_size)
  54. os.makedirs(save_dir, exist_ok=True)
  55. np.save(lapse_dscs_file, lapse_dscs)
  56. # Motion DSIFT descriptors
  57. if args.include_motion:
  58. if os.path.isfile(motion_dscs_file):
  59. if os.path.isfile(dictionary_file):
  60. # if dictionary file already exists, we don't need the descriptors
  61. print(f"{dictionary_file} already exists, skipping motion descriptor extraction...")
  62. else:
  63. print(f"{motion_dscs_file} already exists, loading motion descriptors from file...", end="")
  64. motion_dscs = np.load(motion_dscs_file)
  65. assert motion_dscs.shape[-1] == 128
  66. motion_dscs = motion_dscs.reshape(-1, 128)
  67. print(f"Loaded {len(motion_dscs)} motion descriptors!")
  68. lapse_dscs = np.concatenate([lapse_dscs, motion_dscs])
  69. else:
  70. # Step 1b - extract dense SIFT descriptors from motion images
  71. print("Extracting motion descriptors...")
  72. motion_dscs = extract_descriptors(list(session.generate_motion_images()), kp_step=args.step_size, kp_size=args.keypoint_size)
  73. os.makedirs(save_dir, exist_ok=True)
  74. np.save(motion_dscs_file, motion_dscs)
  75. lapse_dscs = np.concatenate([lapse_dscs, motion_dscs])
  76. # BOW dictionary
  77. if os.path.isfile(dictionary_file):
  78. print(f"{dictionary_file} already exists, loading BOW dictionary from file...")
  79. dictionary = np.load(dictionary_file)
  80. else:
  81. # Step 2 - create BOW dictionary from Lapse SIFT descriptors
  82. print(f"Creating BOW vocabulary with {args.clusters} clusters from {len(lapse_dscs)} descriptors...")
  83. start_time = timer()
  84. if args.random_prototypes:
  85. dictionary = pick_random_descriptors(lapse_dscs, args.clusters)
  86. else:
  87. dictionary = generate_dictionary_from_descriptors(lapse_dscs, args.clusters)
  88. end_time = timer()
  89. delta_time = timedelta(seconds=end_time-start_time)
  90. print(f"Clustering took {delta_time}.")
  91. np.save(dictionary_file, dictionary)
  92. # Extract Lapse BOW features using vocabulary (train data)
  93. if os.path.isfile(train_feat_file):
  94. print(f"{train_feat_file} already exists, skipping lapse BOW feature extraction...")
  95. else:
  96. # Step 3 - calculate training data (BOW features of Lapse images)
  97. print(f"Extracting BOW features from Lapse images...")
  98. features = [feat for _, feat in generate_bow_features(list(session.generate_lapse_images()), dictionary, kp_step=args.step_size, kp_size=args.keypoint_size)]
  99. np.save(train_feat_file, features)
  100. print("Complete!")
  101. if __name__ == "__main__":
  102. main()