sift.cpp 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391
  1. // file: sift.cpp
  2. // author: Andrea Vedaldi
  3. // description: Sift definition
  4. // AUTORIGHTS
  5. // Copyright (c) 2006 The Regents of the University of California
  6. // All Rights Reserved.
  7. //
  8. // Created by Andrea Vedaldi (UCLA VisionLab)
  9. //
  10. // Permission to use, copy, modify, and distribute this software and its
  11. // documentation for educational, research and non-profit purposes,
  12. // without fee, and without a written agreement is hereby granted,
  13. // provided that the above copyright notice, this paragraph and the
  14. // following three paragraphs appear in all copies.
  15. //
  16. // This software program and documentation are copyrighted by The Regents
  17. // of the University of California. The software program and
  18. // documentation are supplied "as is", without any accompanying services
  19. // from The Regents. The Regents does not warrant that the operation of
  20. // the program will be uninterrupted or error-free. The end-user
  21. // understands that the program was developed for research purposes and
  22. // is advised not to rely exclusively on the program for any reason.
  23. //
  24. // This software embodies a method for which the following patent has
  25. // been issued: "Method and apparatus for identifying scale invariant
  26. // features in an image and use of same for locating an object in an
  27. // image," David G. Lowe, US Patent 6,711,293 (March 23,
  28. // 2004). Provisional application filed March 8, 1999. Asignee: The
  29. // University of British Columbia.
  30. //
  31. // IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
  32. // FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
  33. // INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND
  34. // ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN
  35. // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF
  36. // CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
  37. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. // A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
  39. // BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE
  40. // MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  41. /** @name Scale Invariant Feature Transform
  42. **
  43. ** The algorithm is implemented by the class VL::Sift.
  44. **/
  45. #include<vislearning/features/localfeatures/sift.h>
  46. #include<vislearning/features/localfeatures/sift-conv.tpp>
  47. #include<algorithm>
  48. #include<iostream>
  49. #include<sstream>
  50. #include<cassert>
  51. extern "C" {
  52. #if defined (VL_MAC)
  53. #include<libgen.h>
  54. #else
  55. #include<string.h>
  56. }
  57. #endif
  58. using namespace VL ;
  59. // on startup, pre-compute expn(x) = exp(-x)
  60. namespace VL {
  61. namespace Detail {
  62. int const expnTableSize = 256 ;
  63. VL::float_t const expnTableMax = VL::float_t(25.0) ;
  64. VL::float_t expnTable [ expnTableSize + 1 ] ;
  65. struct buildExpnTable
  66. {
  67. buildExpnTable() {
  68. for (int k = 0 ; k < expnTableSize + 1 ; ++k) {
  69. expnTable[k] = exp( - VL::float_t(k) / expnTableSize * expnTableMax ) ;
  70. }
  71. }
  72. } _buildExpnTable ;
  73. }
  74. }
  75. namespace VL {
  76. namespace Detail {
  77. /** Comment eater istream manipulator */
  78. class _cmnt {} cmnt ;
  79. /** @brief Extract a comment from a stream
  80. **
  81. ** The function extracts a block of consecutive comments from an
  82. ** input stream. A comment is a sequence of whitespaces, followed by
  83. ** a `#' character, other characters and terminated at the next line
  84. ** ending. A block of comments is just a sequence of comments.
  85. **/
  86. std::istream&
  87. operator>>(std::istream& is, _cmnt& manip)
  88. {
  89. char c ;
  90. char b [1024] ;
  91. is>>c ;
  92. if ( c != '#' )
  93. return is.putback(c) ;
  94. is.getline(b,1024) ;
  95. return is ;
  96. }
  97. }
  98. /** @brief Insert PGM file into stream
  99. **
  100. ** The function iserts into the stream @a os the grayscale image @a
  101. ** im encoded as a PGM file. The immage is assumed to be normalized
  102. ** in the range 0.0 - 1.0.
  103. **
  104. ** @param os output stream.
  105. ** @param im pointer to image data.
  106. ** @param width image width.
  107. ** @param height image height.
  108. ** @return the stream @a os.
  109. **/
  110. std::ostream&
  111. insertPgm(std::ostream& os, pixel_t const* im, int width, int height)
  112. {
  113. os<< "P5" << "\n"
  114. << width << " "
  115. << height << "\n"
  116. << "255" << "\n" ;
  117. for (int y = 0 ; y < height ; ++y) {
  118. for (int x = 0 ; x < width ; ++x) {
  119. unsigned char v =
  120. (unsigned char)
  121. (std::max(std::min(*im++, 1.0f),0.f) * 255.0f) ;
  122. os << v ;
  123. }
  124. }
  125. return os ;
  126. }
  127. /** @brief Extract PGM file from stream.
  128. **
  129. ** The function extracts from the stream @a in a grayscale image
  130. ** encoded as a PGM file. The function fills the structure @a buffer,
  131. ** containing the image dimensions and a pointer to the image data.
  132. **
  133. ** The image data is an array of floats and is owned by the caller,
  134. ** which should erase it as in
  135. **
  136. ** @code
  137. ** delete [] buffer.data.
  138. ** @endcode
  139. **
  140. ** When the function encouters an error it throws a generic instance
  141. ** of VL::Exception.
  142. **
  143. ** @param in input stream.
  144. ** @param buffer buffer descriptor to be filled.
  145. ** @return the stream @a in.
  146. **/
  147. std::istream&
  148. extractPgm(std::istream& in, PgmBuffer& buffer)
  149. {
  150. pixel_t* im_pt ;
  151. int width ;
  152. int height ;
  153. int maxval ;
  154. char c ;
  155. in>>c ;
  156. if ( c != 'P') VL_THROW("File is not in PGM format") ;
  157. bool is_ascii ;
  158. in>>c ;
  159. switch ( c ) {
  160. case '2' :
  161. is_ascii = true ;
  162. break ;
  163. case '5' :
  164. is_ascii = false ;
  165. break ;
  166. default :
  167. VL_THROW("File is not in PGM format") ;
  168. }
  169. in >> Detail::cmnt
  170. >> width
  171. >> Detail::cmnt
  172. >> height
  173. >> Detail::cmnt
  174. >> maxval ;
  175. // after maxval no more comments, just a whitespace or newline
  176. {
  177. char trash ;
  178. in.get(trash) ;
  179. }
  180. if (maxval > 255)
  181. VL_THROW("Only <= 8-bit per channel PGM files are supported") ;
  182. if (! in.good())
  183. VL_THROW("PGM header parsing error") ;
  184. im_pt = new pixel_t [ width*height ];
  185. try {
  186. if ( is_ascii ) {
  187. pixel_t* start = im_pt ;
  188. pixel_t* end = start + width*height ;
  189. pixel_t norm = pixel_t( maxval ) ;
  190. while ( start != end ) {
  191. int i ;
  192. in >> i ;
  193. if ( ! in.good() ) VL_THROW
  194. ("PGM parsing error file (width="<<width
  195. <<" height="<<height
  196. <<" maxval="<<maxval
  197. <<" at pixel="<<start-im_pt<<")") ;
  198. *start++ = pixel_t( i ) / norm ;
  199. }
  200. } else {
  201. std::streampos beg = in.tellg() ;
  202. char* buffer = new char [width*height] ;
  203. in.read(buffer, width*height) ;
  204. if ( ! in.good() ) VL_THROW
  205. ("PGM parsing error file (width="<<width
  206. <<" height="<<height
  207. <<" maxval="<<maxval
  208. <<" at pixel="<<in.tellg()-beg<<")") ;
  209. pixel_t* start = im_pt ;
  210. pixel_t* end = start + width*height ;
  211. uint8_t* src = reinterpret_cast<uint8_t*>(buffer) ;
  212. while ( start != end ) *start++ = *src++ / 255.0f ;
  213. }
  214. } catch (...) {
  215. delete [] im_pt ;
  216. throw ;
  217. }
  218. buffer.width = width ;
  219. buffer.height = height ;
  220. buffer.data = im_pt ;
  221. return in ;
  222. }
  223. // ===================================================================
  224. // Low level image operations
  225. // -------------------------------------------------------------------
  226. namespace Detail {
  227. /** @brief Copy an image
  228. ** @param dst output imgage buffer.
  229. ** @param src input image buffer.
  230. ** @param width input image width.
  231. ** @param height input image height.
  232. **/
  233. void
  234. copy(pixel_t* dst, pixel_t const* src, int width, int height)
  235. {
  236. memcpy(dst, src, sizeof(pixel_t)*width*height) ;
  237. }
  238. /** @brief Copy an image upsampling two times
  239. **
  240. ** The destination buffer must be at least as big as two times the
  241. ** input buffer. Bilinear interpolation is used.
  242. **
  243. ** @param dst output imgage buffer.
  244. ** @param src input image buffer.
  245. ** @param width input image width.
  246. ** @param height input image height.
  247. **/
  248. void
  249. copyAndUpsampleRows
  250. (pixel_t* dst, pixel_t const* src, int width, int height)
  251. {
  252. for (int y = 0 ; y < height ; ++y) {
  253. pixel_t b, a ;
  254. b = a = *src++ ;
  255. for (int x = 0 ; x < width-1 ; ++x) {
  256. b = *src++ ;
  257. *dst = a ;
  258. dst += height ;
  259. *dst = 0.5*(a+b) ;
  260. dst += height ;
  261. a = b ;
  262. }
  263. *dst = b ;
  264. dst += height ;
  265. *dst = b ;
  266. dst += height ;
  267. dst += 1 - width * 2 * height ;
  268. }
  269. }
  270. /** @brief Copy and downasample an image
  271. **
  272. ** The image is downsampled @a d times, i.e. reduced to @c 1/2^d of
  273. ** its original size. The parameters @a width and @a height are the
  274. ** size of the input image. The destination image is assumed to be @c
  275. ** floor(width/2^d) pixels wide and @c floor(height/2^d) pixels high.
  276. **
  277. ** @param dst output imgage buffer.
  278. ** @param src input image buffer.
  279. ** @param width input image width.
  280. ** @param height input image height.
  281. ** @param d downsampling factor.
  282. **/
  283. void
  284. copyAndDownsample(pixel_t* dst, pixel_t const* src,
  285. int width, int height, int d)
  286. {
  287. for (int y = 0 ; y < height ; y+=d) {
  288. pixel_t const * srcrowp = src + y * width ;
  289. for (int x = 0 ; x < width - (d-1) ; x+=d) {
  290. *dst++ = *srcrowp ;
  291. srcrowp += d ;
  292. }
  293. }
  294. }
  295. }
  296. /** @brief Smooth an image
  297. **
  298. ** The function convolves the image @a src by a Gaussian kernel of
  299. ** variance @a s and writes the result to @a dst. The function also
  300. ** needs a scratch buffer @a dst of the same size of @a src and @a
  301. ** dst.
  302. **
  303. ** @param dst output image buffer.
  304. ** @param temp scratch image buffer.
  305. ** @param src input image buffer.
  306. ** @param width width of the buffers.
  307. ** @param height height of the buffers.
  308. ** @param s standard deviation of the Gaussian kernel.
  309. **/
  310. void
  311. Sift::smooth
  312. (pixel_t* dst, pixel_t* temp,
  313. pixel_t const* src, int width, int height,
  314. VL::float_t s)
  315. {
  316. // make sure a buffer larege enough has been allocated
  317. // to hold the filter
  318. int W = int( ceil( VL::float_t(4.0) * s ) ) ;
  319. if ( ! filter ) {
  320. filterReserved = 0 ;
  321. }
  322. if ( filterReserved < W ) {
  323. filterReserved = W ;
  324. if ( filter ) delete [] filter ;
  325. filter = new pixel_t [ 2* filterReserved + 1 ] ;
  326. }
  327. // pre-compute filter
  328. for (int j = 0 ; j < 2*W+1 ; ++j)
  329. filter[j] = VL::pixel_t
  330. (std::exp
  331. (VL::float_t
  332. (-0.5 * (j-W) * (j-W) / (s*s) ))) ;
  333. // normalize to one
  334. normalize(filter, W) ;
  335. // convolve
  336. econvolve(temp, src, width, height, filter, W) ;
  337. econvolve(dst, temp, height, width, filter, W) ;
  338. }
  339. // ===================================================================
  340. // Sift(), ~Sift()
  341. // -------------------------------------------------------------------
  342. /** @brief Initialize Gaussian scale space parameters
  343. **
  344. ** @param _im_pt Source image data
  345. ** @param _width Soruce image width
  346. ** @param _height Soruce image height
  347. ** @param _sigman Nominal smoothing value of the input image.
  348. ** @param _sigma0 Base smoothing level.
  349. ** @param _O Number of octaves.
  350. ** @param _S Number of levels per octave.
  351. ** @param _omin First octave.
  352. ** @param _smin First level in each octave.
  353. ** @param _smax Last level in each octave.
  354. **/
  355. Sift::Sift(const pixel_t* _im_pt, int _width, int _height,
  356. VL::float_t _sigman,
  357. VL::float_t _sigma0,
  358. int _O, int _S,
  359. int _omin, int _smin, int _smax) : sigman( _sigman ), sigma0( _sigma0 ), O( _O ), S( _S ), omin( _omin ), smin( _smin ), smax( _smax ), magnif( 3.0f ), normalizeDescriptor( true ),
  360. temp( NULL ),
  361. octaves( NULL ),
  362. filter( NULL )
  363. {
  364. process(_im_pt, _width, _height) ;
  365. }
  366. /** @brief Destroy SIFT filter.
  367. **/
  368. Sift::~Sift()
  369. {
  370. freeBuffers() ;
  371. }
  372. /** Allocate buffers. Buffer sizes depend on the image size and the
  373. ** value of omin.
  374. **/
  375. void
  376. Sift::
  377. prepareBuffers()
  378. {
  379. // compute buffer size
  380. int w = (omin >= 0) ? (width >> omin) : (width << -omin) ;
  381. int h = (omin >= 0) ? (height >> omin) : (height << -omin) ;
  382. int size = w*h* std::max
  383. ((smax - smin), 2*((smax+1) - (smin-2) +1)) ;
  384. if ( temp && tempReserved == size ) return ;
  385. freeBuffers() ;
  386. // allocate
  387. temp = new pixel_t [ size ] ;
  388. tempReserved = size ;
  389. tempIsGrad = false ;
  390. tempOctave = 0 ;
  391. octaves = new pixel_t* [ O ] ;
  392. for (int o = 0 ; o < O ; ++o) {
  393. octaves[o] = new pixel_t [ (smax - smin + 1) * w * h ] ;
  394. w >>= 1 ;
  395. h >>= 1 ;
  396. }
  397. }
  398. /** @brief Free buffers.
  399. **
  400. ** This function releases any buffer allocated by prepareBuffers().
  401. **
  402. ** @sa prepareBuffers().
  403. **/
  404. void
  405. Sift::
  406. freeBuffers()
  407. {
  408. if ( filter ) {
  409. delete [] filter ;
  410. }
  411. filter = 0 ;
  412. if ( octaves ) {
  413. for (int o = 0 ; o < O ; ++o) {
  414. delete [] octaves[ o ] ;
  415. }
  416. delete [] octaves ;
  417. }
  418. octaves = 0 ;
  419. if ( temp ) {
  420. delete [] temp ;
  421. }
  422. temp = 0 ;
  423. }
  424. // ===================================================================
  425. // getKeypoint
  426. // -------------------------------------------------------------------
  427. /** @brief Get keypoint from position and scale
  428. **
  429. ** The function returns a keypoint with a given position and
  430. ** scale. Note that the keypoint structure contains fields that make
  431. ** sense only in conjunction with a specific scale space. Therefore
  432. ** the keypoint structure should be re-calculated whenever the filter
  433. ** is applied to a new image, even if the parameters @a x, @a y and
  434. ** @a sigma do not change.
  435. **
  436. ** @param x x coordinate of the center.
  437. ** @peram y y coordinate of the center.
  438. ** @param sigma scale.
  439. ** @return Corresponing keypoint.
  440. **/
  441. Sift::Keypoint
  442. Sift::getKeypoint(VL::float_t x, VL::float_t y, VL::float_t sigma) const
  443. {
  444. /*
  445. The formula linking the keypoint scale sigma to the octave and
  446. scale index is
  447. (1) sigma(o,s) = sigma0 2^(o+s/S)
  448. for which
  449. (2) o + s/S = log2 sigma/sigma0 == phi.
  450. In addition to the scale index s (which can be fractional due to
  451. scale interpolation) a keypoint has an integer scale index is too
  452. (which is the index of the scale level where it was detected in
  453. the DoG scale space). We have the constraints:
  454. - o and is are integer
  455. - is is in the range [smin+1, smax-2 ]
  456. - o is in the range [omin, omin+O-1]
  457. - is = rand(s) most of the times (but not always, due to the way s
  458. is obtained by quadratic interpolation of the DoG scale space).
  459. Depending on the values of smin and smax, often (2) has multiple
  460. solutions is,o that satisfy all constraints. In this case we
  461. choose the one with biggest index o (this saves a bit of
  462. computation).
  463. DETERMINING THE OCTAVE INDEX O
  464. From (2) we have o = phi - s/S and we want to pick the biggest
  465. possible index o in the feasible range. This corresponds to
  466. selecting the smallest possible index s. We write s = is + ds
  467. where in most cases |ds|<.5 (but in general |ds|<1). So we have
  468. o = phi - s/S, s = is + ds , |ds| < .5 (or |ds| < 1).
  469. Since is is in the range [smin+1,smax-2], s is in the range
  470. [smin+.5,smax-1.5] (or [smin,smax-1]), the number o is an integer
  471. in the range phi+[-smax+1.5,-smin-.5] (or
  472. phi+[-smax+1,-smin]). Thus the maximum value of o is obtained for
  473. o = floor(phi-smin-.5) (or o = floor(phi-smin)).
  474. Finally o is clamped to make sure it is contained in the feasible
  475. range.
  476. DETERMINING THE SCALE INDEXES S AND IS
  477. Given o we can derive is by writing (2) as
  478. s = is + ds = S(phi - o).
  479. We then take is = round(s) and clamp its value to be in the
  480. feasible range.
  481. */
  482. int o,ix,iy,is ;
  483. VL::float_t s,phi ;
  484. phi = log2(sigma/sigma0) ;
  485. o = fast_floor( phi - (VL::float_t(smin)+.5)/S ) ;
  486. o = std::min(o, omin+O-1) ;
  487. o = std::max(o, omin ) ;
  488. s = S * (phi - o) ;
  489. is = int(s + 0.5) ;
  490. is = std::min(is, smax - 2) ;
  491. is = std::max(is, smin + 1) ;
  492. VL::float_t per = getOctaveSamplingPeriod(o) ;
  493. ix = int(x / per + 0.5) ;
  494. iy = int(y / per + 0.5) ;
  495. Keypoint key ;
  496. key.o = o ;
  497. key.ix = ix ;
  498. key.iy = iy ;
  499. key.is = is ;
  500. key.x = x ;
  501. key.y = y ;
  502. key.s = s ;
  503. key.sigma = sigma ;
  504. return key ;
  505. }
  506. // ===================================================================
  507. // process()
  508. // -------------------------------------------------------------------
  509. /** @brief Compute Gaussian Scale Space
  510. **
  511. ** The method computes the Gaussian scale space of the specified
  512. ** image. The scale space data is managed internally and can be
  513. ** accessed by means of getOctave() and getLevel().
  514. **
  515. ** @remark Calling this method will delete the list of keypoints
  516. ** constructed by detectKeypoints().
  517. **
  518. ** @param _im_pt pointer to image data.
  519. ** @param _width image width.
  520. ** @param _height image height .
  521. **/
  522. void
  523. Sift::
  524. process(const pixel_t* _im_pt, int _width, int _height)
  525. {
  526. using namespace Detail ;
  527. width = _width ;
  528. height = _height ;
  529. prepareBuffers() ;
  530. VL::float_t sigmak = powf(2.0f, 1.0 / S) ;
  531. VL::float_t dsigma0 = sigma0 * sqrt (1.0f - 1.0f / (sigmak*sigmak) ) ;
  532. // -----------------------------------------------------------------
  533. // Make pyramid base
  534. // -----------------------------------------------------------------
  535. if ( omin < 0 ) {
  536. copyAndUpsampleRows(temp, _im_pt, width, height ) ;
  537. copyAndUpsampleRows(octaves[0], temp, height, 2*width ) ;
  538. for (int o = -1 ; o > omin ; --o) {
  539. copyAndUpsampleRows(temp, octaves[0], width << -o, height << -o) ;
  540. copyAndUpsampleRows(octaves[0], temp, height << -o, 2*(width << -o)) ;
  541. }
  542. } else if ( omin > 0 ) {
  543. copyAndDownsample(octaves[0], _im_pt, width, height, 1 << omin) ;
  544. } else {
  545. copy(octaves[0], _im_pt, width, height) ;
  546. }
  547. {
  548. VL::float_t sa = sigma0 * powf(sigmak, smin) ;
  549. VL::float_t sb = sigman / powf(2.0f, omin) ; // review this
  550. if ( sa > sb ) {
  551. VL::float_t sd = sqrt ( sa*sa - sb*sb ) ;
  552. smooth( octaves[0], temp, octaves[0],
  553. getOctaveWidth(omin),
  554. getOctaveHeight(omin),
  555. sd ) ;
  556. }
  557. }
  558. // -----------------------------------------------------------------
  559. // Make octaves
  560. // -----------------------------------------------------------------
  561. for (int o = omin ; o < omin+O ; ++o) {
  562. // Prepare octave base
  563. if ( o > omin ) {
  564. int sbest = std::min(smin + S, smax) ;
  565. copyAndDownsample(getLevel(o, smin ),
  566. getLevel(o-1, sbest),
  567. getOctaveWidth(o-1),
  568. getOctaveHeight(o-1), 2 ) ;
  569. VL::float_t sa = sigma0 * powf(sigmak, smin ) ;
  570. VL::float_t sb = sigma0 * powf(sigmak, sbest - S ) ;
  571. if (sa > sb ) {
  572. VL::float_t sd = sqrt ( sa*sa - sb*sb ) ;
  573. smooth( getLevel(o,0), temp, getLevel(o,0),
  574. getOctaveWidth(o), getOctaveHeight(o),
  575. sd ) ;
  576. }
  577. }
  578. // Make other levels
  579. for (int s = smin+1 ; s <= smax ; ++s) {
  580. VL::float_t sd = dsigma0 * powf(sigmak, s) ;
  581. smooth( getLevel(o,s), temp, getLevel(o,s-1),
  582. getOctaveWidth(o), getOctaveHeight(o),
  583. sd ) ;
  584. }
  585. }
  586. }
  587. /** @brief Sift detector
  588. **
  589. ** The function runs the SIFT detector on the stored Gaussian scale
  590. ** space (see process()). The detector consists in three steps
  591. **
  592. ** - local maxima detection;
  593. ** - subpixel interpolation;
  594. ** - rejection of weak keypoints (@a threhsold);
  595. ** - rejection of keypoints on edge-like structures (@a edgeThreshold).
  596. **
  597. ** As they are found, keypoints are added to an internal list. This
  598. ** list can be accessed by means of the member functions
  599. ** getKeypointsBegin() and getKeypointsEnd(). The list is ordered by
  600. ** octave, which is usefult to speed-up computeKeypointOrientations()
  601. ** and computeKeypointDescriptor().
  602. **/
  603. void
  604. Sift::detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold)
  605. {
  606. keypoints.clear() ;
  607. int nValidatedKeypoints = 0 ;
  608. // Process one octave per time
  609. for (int o = omin ; o < omin + O ; ++o) {
  610. int const xo = 1 ;
  611. int const yo = getOctaveWidth(o) ;
  612. int const so = getOctaveWidth(o) * getOctaveHeight(o) ;
  613. int const ow = getOctaveWidth(o) ;
  614. int const oh = getOctaveHeight(o) ;
  615. VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
  616. // -----------------------------------------------------------------
  617. // Difference of Gaussians
  618. // -----------------------------------------------------------------
  619. pixel_t* dog = temp ;
  620. tempIsGrad = false ;
  621. {
  622. pixel_t* pt = dog ;
  623. for (int s = smin ; s <= smax-1 ; ++s) {
  624. pixel_t* srca = getLevel(o, s ) ;
  625. pixel_t* srcb = getLevel(o, s+1) ;
  626. pixel_t* enda = srcb ;
  627. while ( srca != enda ) {
  628. *pt++ = *srcb++ - *srca++ ;
  629. }
  630. }
  631. }
  632. // -----------------------------------------------------------------
  633. // Find points of extremum
  634. // -----------------------------------------------------------------
  635. {
  636. pixel_t* pt = dog + xo + yo + so ;
  637. for (int s = smin+1 ; s <= smax-2 ; ++s) {
  638. for (int y = 1 ; y < oh - 1 ; ++y) {
  639. for (int x = 1 ; x < ow - 1 ; ++x) {
  640. pixel_t v = *pt ;
  641. // assert( (pt - x*xo - y*yo - (s-smin)*so) - dog == 0 ) ;
  642. #define CHECK_NEIGHBORS(CMP,SGN) \
  643. ( v CMP ## = SGN 0.8 * threshold && \
  644. v CMP *(pt + xo) && \
  645. v CMP *(pt - xo) && \
  646. v CMP *(pt + so) && \
  647. v CMP *(pt - so) && \
  648. v CMP *(pt + yo) && \
  649. v CMP *(pt - yo) && \
  650. \
  651. v CMP *(pt + yo + xo) && \
  652. v CMP *(pt + yo - xo) && \
  653. v CMP *(pt - yo + xo) && \
  654. v CMP *(pt - yo - xo) && \
  655. \
  656. v CMP *(pt + xo + so) && \
  657. v CMP *(pt - xo + so) && \
  658. v CMP *(pt + yo + so) && \
  659. v CMP *(pt - yo + so) && \
  660. v CMP *(pt + yo + xo + so) && \
  661. v CMP *(pt + yo - xo + so) && \
  662. v CMP *(pt - yo + xo + so) && \
  663. v CMP *(pt - yo - xo + so) && \
  664. \
  665. v CMP *(pt + xo - so) && \
  666. v CMP *(pt - xo - so) && \
  667. v CMP *(pt + yo - so) && \
  668. v CMP *(pt - yo - so) && \
  669. v CMP *(pt + yo + xo - so) && \
  670. v CMP *(pt + yo - xo - so) && \
  671. v CMP *(pt - yo + xo - so) && \
  672. v CMP *(pt - yo - xo - so) )
  673. if ( CHECK_NEIGHBORS(>,+) || CHECK_NEIGHBORS(<,-) ) {
  674. Keypoint k ;
  675. k.ix = x ;
  676. k.iy = y ;
  677. k.is = s ;
  678. keypoints.push_back(k) ;
  679. }
  680. pt += 1 ;
  681. }
  682. pt += 2 ;
  683. }
  684. pt += 2*yo ;
  685. }
  686. }
  687. // -----------------------------------------------------------------
  688. // Refine local maxima
  689. // -----------------------------------------------------------------
  690. { // refine
  691. KeypointsIter siter ;
  692. KeypointsIter diter ;
  693. for (diter = siter = keypointsBegin() + nValidatedKeypoints ;
  694. siter != keypointsEnd() ;
  695. ++siter) {
  696. int x = int( siter->ix ) ;
  697. int y = int( siter->iy ) ;
  698. int s = int( siter->is ) ;
  699. VL::float_t Dx=0,Dy=0,Ds=0,Dxx=0,Dyy=0,Dss=0,Dxy=0,Dxs=0,Dys=0 ;
  700. VL::float_t b [3] ;
  701. pixel_t* pt ;
  702. int dx = 0 ;
  703. int dy = 0 ;
  704. // must be exec. at least once
  705. for (int iter = 0 ; iter < 5 ; ++iter) {
  706. VL::float_t A[3*3] ;
  707. x += dx ;
  708. y += dy ;
  709. pt = dog
  710. + xo * x
  711. + yo * y
  712. + so * (s - smin) ;
  713. #define at(dx,dy,ds) (*( pt + (dx)*xo + (dy)*yo + (ds)*so))
  714. #define Aat(i,j) (A[(i)+(j)*3])
  715. /* Compute the gradient. */
  716. Dx = 0.5 * (at(+1,0,0) - at(-1,0,0)) ;
  717. Dy = 0.5 * (at(0,+1,0) - at(0,-1,0));
  718. Ds = 0.5 * (at(0,0,+1) - at(0,0,-1)) ;
  719. /* Compute the Hessian. */
  720. Dxx = (at(+1,0,0) + at(-1,0,0) - 2.0 * at(0,0,0)) ;
  721. Dyy = (at(0,+1,0) + at(0,-1,0) - 2.0 * at(0,0,0)) ;
  722. Dss = (at(0,0,+1) + at(0,0,-1) - 2.0 * at(0,0,0)) ;
  723. Dxy = 0.25 * ( at(+1,+1,0) + at(-1,-1,0) - at(-1,+1,0) - at(+1,-1,0) ) ;
  724. Dxs = 0.25 * ( at(+1,0,+1) + at(-1,0,-1) - at(-1,0,+1) - at(+1,0,-1) ) ;
  725. Dys = 0.25 * ( at(0,+1,+1) + at(0,-1,-1) - at(0,-1,+1) - at(0,+1,-1) ) ;
  726. /* Solve linear system. */
  727. Aat(0,0) = Dxx ;
  728. Aat(1,1) = Dyy ;
  729. Aat(2,2) = Dss ;
  730. Aat(0,1) = Aat(1,0) = Dxy ;
  731. Aat(0,2) = Aat(2,0) = Dxs ;
  732. Aat(1,2) = Aat(2,1) = Dys ;
  733. b[0] = - Dx ;
  734. b[1] = - Dy ;
  735. b[2] = - Ds ;
  736. // Gauss elimination
  737. for (int j = 0 ; j < 3 ; ++j) {
  738. // look for leading pivot
  739. VL::float_t maxa = 0 ;
  740. VL::float_t maxabsa = 0 ;
  741. int maxi = -1 ;
  742. int i ;
  743. for (i = j ; i < 3 ; ++i) {
  744. VL::float_t a = Aat(i,j) ;
  745. VL::float_t absa = fabsf( a ) ;
  746. if ( absa > maxabsa ) {
  747. maxa = a ;
  748. maxabsa = absa ;
  749. maxi = i ;
  750. }
  751. }
  752. // singular?
  753. if ( maxabsa < 1e-10f ) {
  754. b[0] = 0 ;
  755. b[1] = 0 ;
  756. b[2] = 0 ;
  757. break ;
  758. }
  759. i = maxi ;
  760. // swap j-th row with i-th row and
  761. // normalize j-th row
  762. for (int jj = j ; jj < 3 ; ++jj) {
  763. std::swap( Aat(j,jj) , Aat(i,jj) ) ;
  764. Aat(j,jj) /= maxa ;
  765. }
  766. std::swap( b[j], b[i] ) ;
  767. b[j] /= maxa ;
  768. // elimination
  769. for (int ii = j+1 ; ii < 3 ; ++ii) {
  770. VL::float_t x = Aat(ii,j) ;
  771. for (int jj = j ; jj < 3 ; ++jj) {
  772. Aat(ii,jj) -= x * Aat(j,jj) ;
  773. }
  774. b[ii] -= x * b[j] ;
  775. }
  776. }
  777. // backward substitution
  778. for (int i = 2 ; i > 0 ; --i) {
  779. VL::float_t x = b[i] ;
  780. for (int ii = i-1 ; ii >= 0 ; --ii) {
  781. b[ii] -= x * Aat(ii,i) ;
  782. }
  783. }
  784. /* If the translation of the keypoint is big, move the keypoint
  785. * and re-iterate the computation. Otherwise we are all set.
  786. */
  787. dx= ((b[0] > 0.6 && x < ow-2) ? 1 : 0 )
  788. + ((b[0] < -0.6 && x > 1 ) ? -1 : 0 ) ;
  789. dy= ((b[1] > 0.6 && y < oh-2) ? 1 : 0 )
  790. + ((b[1] < -0.6 && y > 1 ) ? -1 : 0 ) ;
  791. /*
  792. std::cout<<x<<","<<y<<"="<<at(0,0,0)
  793. <<"("
  794. <<at(0,0,0)+0.5 * (Dx * b[0] + Dy * b[1] + Ds * b[2])<<")"
  795. <<" "<<std::flush ;
  796. */
  797. if ( dx == 0 && dy == 0 ) break ;
  798. }
  799. /* std::cout<<std::endl ; */
  800. // Accept-reject keypoint
  801. {
  802. VL::float_t val = at(0,0,0) + 0.5 * (Dx * b[0] + Dy * b[1] + Ds * b[2]) ;
  803. VL::float_t score = (Dxx+Dyy)*(Dxx+Dyy) / (Dxx*Dyy - Dxy*Dxy) ;
  804. VL::float_t xn = x + b[0] ;
  805. VL::float_t yn = y + b[1] ;
  806. VL::float_t sn = s + b[2] ;
  807. if (fast_abs(val) > threshold &&
  808. score < (edgeThreshold+1)*(edgeThreshold+1)/edgeThreshold &&
  809. score >= 0 &&
  810. fast_abs(b[0]) < 1.5 &&
  811. fast_abs(b[1]) < 1.5 &&
  812. fast_abs(b[2]) < 1.5 &&
  813. xn >= 0 &&
  814. xn <= ow-1 &&
  815. yn >= 0 &&
  816. yn <= oh-1 &&
  817. sn >= smin &&
  818. sn <= smax ) {
  819. diter->o = o ;
  820. diter->ix = x ;
  821. diter->iy = y ;
  822. diter->is = s ;
  823. diter->x = xn * xperiod ;
  824. diter->y = yn * xperiod ;
  825. diter->s = sn ;
  826. diter->sigma = getScaleFromIndex(o,sn) ;
  827. ++diter ;
  828. }
  829. }
  830. } // next candidate keypoint
  831. // prepare for next octave
  832. keypoints.resize( diter - keypoints.begin() ) ;
  833. nValidatedKeypoints = keypoints.size() ;
  834. } // refine block
  835. } // next octave
  836. }
  837. // ===================================================================
  838. // computeKeypointOrientations()
  839. // -------------------------------------------------------------------
  840. /** @brief Compute modulus and phase of the gradient
  841. **
  842. ** The function computes the modulus and the angle of the gradient of
  843. ** the specified octave @a o. The result is stored in a temporary
  844. ** internal buffer accessed by computeKeypointDescriptor() and
  845. ** computeKeypointOrientations().
  846. **
  847. ** The SIFT detector provides keypoint with scale index s in the
  848. ** range @c smin+1 and @c smax-2. As such, the buffer contains only
  849. ** these levels.
  850. **
  851. ** If called mutliple time on the same data, the function exits
  852. ** immediately.
  853. **
  854. ** @param o octave of interest.
  855. **/
  856. void
  857. Sift::prepareGrad(int o)
  858. {
  859. int const ow = getOctaveWidth(o) ;
  860. int const oh = getOctaveHeight(o) ;
  861. int const xo = 1 ;
  862. int const yo = ow ;
  863. int const so = oh*ow ;
  864. if ( ! tempIsGrad || tempOctave != o ) {
  865. // compute dx/dy
  866. for (int s = smin+1 ; s <= smax-2 ; ++s) {
  867. for (int y = 1 ; y < oh-1 ; ++y ) {
  868. pixel_t* src = getLevel(o, s) + xo + yo*y ;
  869. pixel_t* end = src + ow - 1 ;
  870. pixel_t* grad = 2 * (xo + yo*y + (s - smin -1)*so) + temp ;
  871. while (src != end) {
  872. VL::float_t Gx = 0.5 * ( *(src+xo) - *(src-xo) ) ;
  873. VL::float_t Gy = 0.5 * ( *(src+yo) - *(src-yo) ) ;
  874. VL::float_t m = fast_sqrt( Gx*Gx + Gy*Gy ) ;
  875. VL::float_t t = fast_mod_2pi( fast_atan2(Gy, Gx) + VL::float_t(2*M_PI) );
  876. *grad++ = pixel_t( m ) ;
  877. *grad++ = pixel_t( t ) ;
  878. ++src ;
  879. }
  880. }
  881. }
  882. }
  883. tempIsGrad = true ;
  884. tempOctave = o ;
  885. }
  886. /** @brief Compute the orientation(s) of a keypoint
  887. **
  888. ** The function computes the orientation of the specified keypoint.
  889. ** The function returns up to four different orientations, obtained
  890. ** as strong peaks of the histogram of gradient orientations (a
  891. ** keypoint can theoretically generate more than four orientations,
  892. ** but this is very unlikely).
  893. **
  894. ** @remark The function needs to compute the gradient modululs and
  895. ** orientation of the Gaussian scale space octave to which the
  896. ** keypoint belongs. The result is cached, but discarded if different
  897. ** octaves are visited. Thereofre it is much quicker to evaluate the
  898. ** keypoints in their natural octave order.
  899. **
  900. ** The keypoint must lie within the scale space. In particular, the
  901. ** scale index is supposed to be in the range @c smin+1 and @c smax-1
  902. ** (this is from the SIFT detector). If this is not the case, the
  903. ** computation is silently aborted and no orientations are returned.
  904. **
  905. ** @param angles buffers to store the resulting angles.
  906. ** @param keypoint keypoint to process.
  907. ** @return number of orientations found.
  908. **/
  909. int
  910. Sift::computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint)
  911. {
  912. int const nbins = 36 ;
  913. VL::float_t const winFactor = 1.5 ;
  914. VL::float_t hist [nbins] ;
  915. // octave
  916. int o = keypoint.o ;
  917. VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
  918. // offsets to move in the Gaussian scale space octave
  919. const int ow = getOctaveWidth(o) ;
  920. const int oh = getOctaveHeight(o) ;
  921. const int xo = 2 ;
  922. const int yo = xo * ow ;
  923. const int so = yo * oh ;
  924. // keypoint fractional geometry
  925. VL::float_t x = keypoint.x / xperiod ;
  926. VL::float_t y = keypoint.y / xperiod ;
  927. VL::float_t sigma = keypoint.sigma / xperiod ;
  928. // shall we use keypoints.ix,iy,is here?
  929. int xi = ((int) (x+0.5)) ;
  930. int yi = ((int) (y+0.5)) ;
  931. int si = keypoint.is ;
  932. VL::float_t const sigmaw = winFactor * sigma ;
  933. int W = (int) floor(3.0 * sigmaw) ;
  934. // skip the keypoint if it is out of bounds
  935. if (o < omin ||
  936. o >=omin+O ||
  937. xi < 0 ||
  938. xi > ow-1 ||
  939. yi < 0 ||
  940. yi > oh-1 ||
  941. si < smin+1 ||
  942. si > smax-2 ) {
  943. std::cerr<<"!"<<std::endl ;
  944. return 0 ;
  945. }
  946. // make sure that the gradient buffer is filled with octave o
  947. prepareGrad(o) ;
  948. // clear the SIFT histogram
  949. std::fill(hist, hist + nbins, 0) ;
  950. // fill the SIFT histogram
  951. pixel_t* pt = temp + xi * xo + yi * yo + (si - smin -1) * so ;
  952. #undef at
  953. #define at(dx,dy) (*(pt + (dx)*xo + (dy)*yo))
  954. for (int ys = std::max(-W, 1-yi) ; ys <= std::min(+W, oh -2 -yi) ; ++ys) {
  955. for (int xs = std::max(-W, 1-xi) ; xs <= std::min(+W, ow -2 -xi) ; ++xs) {
  956. VL::float_t dx = xi + xs - x;
  957. VL::float_t dy = yi + ys - y;
  958. VL::float_t r2 = dx*dx + dy*dy ;
  959. // limit to a circular window
  960. if (r2 >= W*W+0.5) continue ;
  961. VL::float_t wgt = VL::fast_expn( r2 / (2*sigmaw*sigmaw) ) ;
  962. VL::float_t mod = *(pt + xs*xo + ys*yo) ;
  963. VL::float_t ang = *(pt + xs*xo + ys*yo + 1) ;
  964. // int bin = (int) floor( nbins * ang / (2*M_PI) ) ;
  965. int bin = (int) floor( nbins * ang / (2*M_PI) ) ;
  966. hist[bin] += mod * wgt ;
  967. }
  968. }
  969. // smooth the histogram
  970. #if defined VL_LOWE_STRICT
  971. // Lowe's version apparently has a little issue with orientations
  972. // around + or - pi, which we reproduce here for compatibility
  973. for (int iter = 0; iter < 6; iter++) {
  974. VL::float_t prev = hist[nbins/2] ;
  975. for (int i = nbins/2-1; i >= -nbins/2 ; --i) {
  976. int const j = (i + nbins) % nbins ;
  977. int const jp = (i - 1 + nbins) % nbins ;
  978. VL::float_t newh = (prev + hist[j] + hist[jp]) / 3.0;
  979. prev = hist[j] ;
  980. hist[j] = newh ;
  981. }
  982. }
  983. #else
  984. // this is slightly more correct
  985. for (int iter = 0; iter < 6; iter++) {
  986. VL::float_t prev = hist[nbins-1] ;
  987. VL::float_t first = hist[0] ;
  988. int i ;
  989. for (i = 0; i < nbins - 1; i++) {
  990. VL::float_t newh = (prev + hist[i] + hist[(i+1) % nbins]) / 3.0;
  991. prev = hist[i] ;
  992. hist[i] = newh ;
  993. }
  994. hist[i] = (prev + hist[i] + first)/3.0 ;
  995. }
  996. #endif
  997. // find the histogram maximum
  998. VL::float_t maxh = * std::max_element(hist, hist + nbins) ;
  999. // find peaks within 80% from max
  1000. int nangles = 0 ;
  1001. for (int i = 0 ; i < nbins ; ++i) {
  1002. VL::float_t h0 = hist [i] ;
  1003. VL::float_t hm = hist [(i-1+nbins) % nbins] ;
  1004. VL::float_t hp = hist [(i+1+nbins) % nbins] ;
  1005. // is this a peak?
  1006. if ( h0 > 0.8*maxh && h0 > hm && h0 > hp ) {
  1007. // quadratic interpolation
  1008. // VL::float_t di = -0.5 * (hp - hm) / (hp+hm-2*h0) ;
  1009. VL::float_t di = -0.5 * (hp - hm) / (hp+hm-2*h0) ;
  1010. VL::float_t th = 2*M_PI * (i+di+0.5) / nbins ;
  1011. angles [ nangles++ ] = th ;
  1012. if ( nangles == 4 )
  1013. goto enough_angles ;
  1014. }
  1015. }
  1016. enough_angles:
  1017. return nangles ;
  1018. }
  1019. // ===================================================================
  1020. // computeKeypointDescriptor()
  1021. // -------------------------------------------------------------------
  1022. namespace Detail {
  1023. /** Normalizes in norm L_2 a descriptor. */
  1024. void
  1025. normalize_histogram(VL::float_t* L_begin, VL::float_t* L_end)
  1026. {
  1027. VL::float_t* L_iter ;
  1028. VL::float_t norm = 0.0 ;
  1029. for (L_iter = L_begin; L_iter != L_end ; ++L_iter)
  1030. norm += (*L_iter) * (*L_iter) ;
  1031. norm = fast_sqrt(norm) ;
  1032. for (L_iter = L_begin; L_iter != L_end ; ++L_iter)
  1033. *L_iter /= (norm + std::numeric_limits<VL::float_t>::epsilon() ) ;
  1034. }
  1035. }
  1036. /** @brief SIFT descriptor
  1037. **
  1038. ** The function computes the descriptor of the keypoint @a keypoint.
  1039. ** The function fills the buffer @a descr_pt which must be large
  1040. ** enough. The funciton uses @a angle0 as rotation of the keypoint.
  1041. ** By calling the function multiple times, different orientations can
  1042. ** be evaluated.
  1043. **
  1044. ** @remark The function needs to compute the gradient modululs and
  1045. ** orientation of the Gaussian scale space octave to which the
  1046. ** keypoint belongs. The result is cached, but discarded if different
  1047. ** octaves are visited. Thereofre it is much quicker to evaluate the
  1048. ** keypoints in their natural octave order.
  1049. **
  1050. ** The function silently abort the computations of keypoints without
  1051. ** the scale space boundaries. See also siftComputeOrientations().
  1052. **/
  1053. void
  1054. Sift::computeKeypointDescriptor
  1055. (VL::float_t* descr_pt,
  1056. Keypoint keypoint,
  1057. VL::float_t angle0)
  1058. {
  1059. /* The SIFT descriptor is a three dimensional histogram of the position
  1060. * and orientation of the gradient. There are NBP bins for each spatial
  1061. * dimesions and NBO bins for the orientation dimesion, for a total of
  1062. * NBP x NBP x NBO bins.
  1063. *
  1064. * The support of each spatial bin has an extension of SBP = 3sigma
  1065. * pixels, where sigma is the scale of the keypoint. Thus all the bins
  1066. * together have a support SBP x NBP pixels wide . Since weighting and
  1067. * interpolation of pixel is used, another half bin is needed at both
  1068. * ends of the extension. Therefore, we need a square window of SBP x
  1069. * (NBP + 1) pixels. Finally, since the patch can be arbitrarly rotated,
  1070. * we need to consider a window 2W += sqrt(2) x SBP x (NBP + 1) pixels
  1071. * wide.
  1072. */
  1073. // octave
  1074. int o = keypoint.o ;
  1075. VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
  1076. // offsets to move in Gaussian scale space octave
  1077. const int ow = getOctaveWidth(o) ;
  1078. const int oh = getOctaveHeight(o) ;
  1079. const int xo = 2 ;
  1080. const int yo = xo * ow ;
  1081. const int so = yo * oh ;
  1082. // keypoint fractional geometry
  1083. VL::float_t x = keypoint.x / xperiod;
  1084. VL::float_t y = keypoint.y / xperiod ;
  1085. VL::float_t sigma = keypoint.sigma / xperiod ;
  1086. VL::float_t st0 = sinf( angle0 ) ;
  1087. VL::float_t ct0 = cosf( angle0 ) ;
  1088. // shall we use keypoints.ix,iy,is here?
  1089. int xi = ((int) (x+0.5)) ;
  1090. int yi = ((int) (y+0.5)) ;
  1091. int si = keypoint.is ;
  1092. // const VL::float_t magnif = 3.0f ;
  1093. const int NBO = 8 ;
  1094. const int NBP = 4 ;
  1095. const VL::float_t SBP = magnif * sigma ;
  1096. const int W = (int) floor (sqrt(2.0) * SBP * (NBP + 1) / 2.0 + 0.5) ;
  1097. /* Offsets to move in the descriptor. */
  1098. /* Use Lowe's convention. */
  1099. const int binto = 1 ;
  1100. const int binyo = NBO * NBP ;
  1101. const int binxo = NBO ;
  1102. // const int bino = NBO * NBP * NBP ;
  1103. int bin ;
  1104. // check bounds
  1105. if (o < omin ||
  1106. o >=omin+O ||
  1107. xi < 0 ||
  1108. xi > ow-1 ||
  1109. yi < 0 ||
  1110. yi > oh-1 ||
  1111. si < smin+1 ||
  1112. si > smax-2 )
  1113. return ;
  1114. // make sure gradient buffer is up-to-date
  1115. prepareGrad(o) ;
  1116. std::fill( descr_pt, descr_pt + NBO*NBP*NBP, 0 ) ;
  1117. /* Center the scale space and the descriptor on the current keypoint.
  1118. * Note that dpt is pointing to the bin of center (SBP/2,SBP/2,0).
  1119. */
  1120. pixel_t const * pt = temp + xi*xo + yi*yo + (si - smin - 1)*so ;
  1121. VL::float_t * dpt = descr_pt + (NBP/2) * binyo + (NBP/2) * binxo ;
  1122. #define atd(dbinx,dbiny,dbint) *(dpt + (dbint)*binto + (dbiny)*binyo + (dbinx)*binxo)
  1123. /*
  1124. * Process pixels in the intersection of the image rectangle
  1125. * (1,1)-(M-1,N-1) and the keypoint bounding box.
  1126. */
  1127. for (int dyi = std::max(-W, 1-yi) ; dyi <= std::min(+W, oh-2-yi) ; ++dyi) {
  1128. for (int dxi = std::max(-W, 1-xi) ; dxi <= std::min(+W, ow-2-xi) ; ++dxi) {
  1129. // retrieve
  1130. VL::float_t mod = *( pt + dxi*xo + dyi*yo + 0 ) ;
  1131. VL::float_t angle = *( pt + dxi*xo + dyi*yo + 1 ) ;
  1132. VL::float_t theta = fast_mod_2pi(-angle + angle0) ; // lowe compatible ?
  1133. // fractional displacement
  1134. VL::float_t dx = xi + dxi - x;
  1135. VL::float_t dy = yi + dyi - y;
  1136. // get the displacement normalized w.r.t. the keypoint
  1137. // orientation and extension.
  1138. VL::float_t nx = ( ct0 * dx + st0 * dy) / SBP ;
  1139. VL::float_t ny = (-st0 * dx + ct0 * dy) / SBP ;
  1140. VL::float_t nt = NBO * theta / (2*M_PI) ;
  1141. // Get the gaussian weight of the sample. The gaussian window
  1142. // has a standard deviation equal to NBP/2. Note that dx and dy
  1143. // are in the normalized frame, so that -NBP/2 <= dx <= NBP/2.
  1144. VL::float_t const wsigma = NBP/2 ;
  1145. VL::float_t win = VL::fast_expn((nx*nx + ny*ny)/(2.0 * wsigma * wsigma)) ;
  1146. // The sample will be distributed in 8 adjacent bins.
  1147. // We start from the ``lower-left'' bin.
  1148. int binx = fast_floor( nx - 0.5 ) ;
  1149. int biny = fast_floor( ny - 0.5 ) ;
  1150. int bint = fast_floor( nt ) ;
  1151. VL::float_t rbinx = nx - (binx+0.5) ;
  1152. VL::float_t rbiny = ny - (biny+0.5) ;
  1153. VL::float_t rbint = nt - bint ;
  1154. int dbinx ;
  1155. int dbiny ;
  1156. int dbint ;
  1157. // Distribute the current sample into the 8 adjacent bins
  1158. for (dbinx = 0 ; dbinx < 2 ; ++dbinx) {
  1159. for (dbiny = 0 ; dbiny < 2 ; ++dbiny) {
  1160. for (dbint = 0 ; dbint < 2 ; ++dbint) {
  1161. if ( binx+dbinx >= -(NBP/2) &&
  1162. binx+dbinx < (NBP/2) &&
  1163. biny+dbiny >= -(NBP/2) &&
  1164. biny+dbiny < (NBP/2) ) {
  1165. VL::float_t weight = win
  1166. * mod
  1167. * fast_abs (1 - dbinx - rbinx)
  1168. * fast_abs (1 - dbiny - rbiny)
  1169. * fast_abs (1 - dbint - rbint) ;
  1170. atd(binx+dbinx, biny+dbiny, (bint+dbint) % NBO) += weight ;
  1171. }
  1172. }
  1173. }
  1174. }
  1175. }
  1176. }
  1177. /* Standard SIFT descriptors are normalized, truncated and normalized again */
  1178. if ( normalizeDescriptor ) {
  1179. /* Normalize the histogram to L2 unit length. */
  1180. Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
  1181. /* Truncate at 0.2. */
  1182. for (bin = 0; bin < NBO*NBP*NBP ; ++bin) {
  1183. if (descr_pt[bin] > 0.2) descr_pt[bin] = 0.2;
  1184. }
  1185. /* Normalize again. */
  1186. Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
  1187. }
  1188. }
  1189. // namespace VL
  1190. }