sift.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. // file: sift.hpp
  2. // author: Andrea Vedaldi
  3. // description: Sift declaration
  4. // AUTORIGHTS
  5. // Copyright (c) 2006 The Regents of the University of California
  6. // All Rights Reserved.
  7. //
  8. // Created by Andrea Vedaldi (UCLA VisionLab)
  9. //
  10. // Permission to use, copy, modify, and distribute this software and its
  11. // documentation for educational, research and non-profit purposes,
  12. // without fee, and without a written agreement is hereby granted,
  13. // provided that the above copyright notice, this paragraph and the
  14. // following three paragraphs appear in all copies.
  15. //
  16. // This software program and documentation are copyrighted by The Regents
  17. // of the University of California. The software program and
  18. // documentation are supplied "as is", without any accompanying services
  19. // from The Regents. The Regents does not warrant that the operation of
  20. // the program will be uninterrupted or error-free. The end-user
  21. // understands that the program was developed for research purposes and
  22. // is advised not to rely exclusively on the program for any reason.
  23. //
  24. // This software embodies a method for which the following patent has
  25. // been issued: "Method and apparatus for identifying scale invariant
  26. // features in an image and use of same for locating an object in an
  27. // image," David G. Lowe, US Patent 6,711,293 (March 23,
  28. // 2004). Provisional application filed March 8, 1999. Asignee: The
  29. // University of British Columbia.
  30. //
  31. // IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
  32. // FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
  33. // INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND
  34. // ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN
  35. // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF
  36. // CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
  37. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. // A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
  39. // BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE
  40. // MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  41. #ifndef VL_SIFT_HPP
  42. #define VL_SIFT_HPP
  43. #include<valarray>
  44. #include<vector>
  45. #include<ostream>
  46. #include<cmath>
  47. #include<limits>
  48. #if defined (VL_USEFASTMATH)
  49. #if defined (VL_MAC)
  50. #define VL_FASTFLOAT float
  51. #else
  52. #define VL_FASTFLOAT double
  53. #endif
  54. #else
  55. #define VL_FASTFLOAT float
  56. #endif
  57. #define VL_XEAS(x) #x
  58. #define VL_EXPAND_AND_STRINGIFY(x) VL_XEAS(x)
  59. /** @brief VisionLab namespace */
  60. namespace VL {
  61. /** @brief Pixel data type */
  62. typedef float pixel_t ;
  63. /** @brief Floating point data type
  64. **
  65. ** Although floats are precise enough for this applicatgion, on Intel
  66. ** based architecture using doubles for floating point computations
  67. ** turns out to be much faster.
  68. **/
  69. typedef VL_FASTFLOAT float_t ;
  70. /** @brief 32-bit floating data type */
  71. typedef float float32_t ;
  72. /** @brief 64-bit floating data type */
  73. typedef double float64_t ;
  74. /** @brief 32-bit integer data type */
  75. typedef int int32_t ;
  76. /** @brief 64-bit integer data type */
  77. typedef long long int int64_t ;
  78. /** @brief 32-bit unsigned integer data type */
  79. typedef int uint32_t ;
  80. /** @brief 8-bit unsigned integer data type */
  81. typedef char unsigned uint8_t ;
  82. /** @name Fast math
  83. **
  84. ** We provide approximate mathematical functions. These are usually
  85. ** rather faster than the corresponding standard library functions.
  86. **/
  87. /*@{*/
  88. float fast_resqrt(float x) ;
  89. double fast_resqrt(double x) ;
  90. float_t fast_expn(float_t x) ;
  91. float_t fast_abs(float_t x) ;
  92. float_t fast_mod_2pi(float_t x) ;
  93. float_t fast_atan2(float_t y, float_t x) ;
  94. float_t fast_sqrt(float_t x) ;
  95. int32_t fast_floor(float_t x) ;
  96. /*@}*/
  97. /** @brief Generic exception */
  98. struct
  99. __attribute__ ((__visibility__("default")))
  100. Exception
  101. {
  102. /** @brief Build generic exception with message
  103. **
  104. ** The message can be accessed as the Exception::msg data member.
  105. **
  106. ** @param _msg message.
  107. **/
  108. Exception(std::string _msg) : msg(_msg) { }
  109. /** Exception message */
  110. std::string msg ;
  111. } ;
  112. /** @brief Throw generic exception
  113. **
  114. ** The macro executes the stream operations @a x to obtain
  115. ** an error messages. The message is then wrapped in a
  116. ** generic exception VL::Exception and thrown.
  117. **
  118. ** @param x sequence of stream operations.
  119. **/
  120. #define VL_THROW(x) \
  121. { \
  122. std::ostringstream oss ; \
  123. oss << x ; \
  124. throw VL::Exception(oss.str()) ; \
  125. }
  126. /** @name PGM input/output */
  127. /*@{*/
  128. /** @brief PGM buffer descriptor
  129. **
  130. ** The structure describes a gray scale image and it is used by the
  131. ** PGM input/output functions. The fileds are self-explanatory.
  132. **/
  133. struct PgmBuffer
  134. {
  135. int width ; ///< Image width
  136. int height ; ///< Image hegith
  137. pixel_t* data ; ///< Image data
  138. } ;
  139. std::ostream& insertPgm(std::ostream&, pixel_t const* im, int width, int height) ;
  140. std::istream& extractPgm(std::istream&, PgmBuffer& buffer) ;
  141. /*@}*/
  142. /** @brief SIFT filter
  143. **
  144. ** This class is a filter computing the Scale Invariant Feature
  145. ** Transform (SIFT).
  146. **/
  147. class Sift
  148. {
  149. public:
  150. /** @brief SIFT keypoint
  151. **
  152. ** A SIFT keypoint is charactedized by a location x,y and a scale
  153. ** @c sigma. The scale is obtained from the level index @c s and
  154. ** the octave index @c o through a simple formula (see the PDF
  155. ** documentation).
  156. **
  157. ** In addition to the location, scale indexes and scale, we also
  158. ** store the integer location and level. The integer location is
  159. ** unnormalized, i.e. relative to the resolution of the octave
  160. ** containing the keypoint (octaves are downsampled).
  161. **/
  162. struct Keypoint
  163. {
  164. int o ; ///< Keypoint octave index
  165. int ix ; ///< Keypoint integer X coordinate (unnormalized)
  166. int iy ; ///< Keypoint integer Y coordinate (unnormalized)
  167. int is ; ///< Keypoint integer scale indiex
  168. float_t x ; ///< Keypoint fractional X coordinate
  169. float_t y ; ///< Keypoint fractional Y coordinate
  170. float_t s ; ///< Keypoint fractional scale index
  171. float_t sigma ; ///< Keypoint scale
  172. } ;
  173. typedef std::vector<Keypoint> Keypoints ; ///< Keypoint list datatype
  174. typedef Keypoints::iterator KeypointsIter ; ///< Keypoint list iter datatype
  175. typedef Keypoints::const_iterator KeypointsConstIter ; ///< Keypoint list const iter datatype
  176. /** @brief Constructors and destructors */
  177. /*@{*/
  178. Sift(const pixel_t* _im_pt, int _width, int _height,
  179. float_t _sigman,
  180. float_t _sigma0,
  181. int _O, int _S,
  182. int _omin, int _smin, int _smax) ;
  183. ~Sift() ;
  184. /*@}*/
  185. void process(const pixel_t* _im_pt, int _width, int _height) ;
  186. /** @brief Querying the Gaussian scale space */
  187. /*@{*/
  188. VL::pixel_t* getOctave(int o) ;
  189. VL::pixel_t* getLevel(int o, int s) ;
  190. int getWidth() const ;
  191. int getHeight() const ;
  192. int getOctaveWidth(int o) const ;
  193. int getOctaveHeight(int o) const ;
  194. VL::float_t getOctaveSamplingPeriod(int o) const ;
  195. VL::float_t getScaleFromIndex(VL::float_t o, VL::float_t s) const ;
  196. Keypoint getKeypoint(VL::float_t x, VL::float_t y, VL::float_t s) const ;
  197. /*@}*/
  198. /** @brief Descriptor parameters */
  199. /*@{*/
  200. bool getNormalizeDescriptor() const ;
  201. void setNormalizeDescriptor(bool) ;
  202. void setMagnification(VL::float_t) ;
  203. VL::float_t getMagnification() const ;
  204. /*@}*/
  205. /** @brief Detector and descriptor */
  206. /*@{*/
  207. void detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold) ;
  208. int computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint) ;
  209. void computeKeypointDescriptor(VL::float_t* descr_pt, Keypoint keypoint, VL::float_t angle) ;
  210. KeypointsIter keypointsBegin() ;
  211. KeypointsIter keypointsEnd() ;
  212. /*@}*/
  213. private:
  214. void prepareBuffers() ;
  215. void freeBuffers() ;
  216. void smooth(VL::pixel_t * dst,
  217. VL::pixel_t * temp,
  218. VL::pixel_t const * src, int width, int height,
  219. VL::float_t s) ;
  220. void prepareGrad(int o) ;
  221. // scale space parameters
  222. VL::float_t sigman ;
  223. VL::float_t sigma0 ;
  224. VL::float_t sigmak ;
  225. int O ;
  226. int S ;
  227. int omin ;
  228. int smin ;
  229. int smax ;
  230. int width ;
  231. int height ;
  232. // descriptor parameters
  233. VL::float_t magnif ;
  234. bool normalizeDescriptor ;
  235. // buffers
  236. VL::pixel_t* temp ;
  237. int tempReserved ;
  238. bool tempIsGrad ;
  239. int tempOctave ;
  240. VL::pixel_t** octaves ;
  241. VL::pixel_t* filter ;
  242. int filterReserved ;
  243. Keypoints keypoints ;
  244. } ;
  245. }
  246. // Include inline functions definitions
  247. #include<vislearning/features/localfeatures/sift.ipp>
  248. // VL_SIFT_HPP
  249. #endif