featuresHOGColor.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. #include <math.h>
  2. #include "mex.h"
  3. // small value, used to avoid division by zero
  4. #define eps 0.0001
  5. // unit vectors used to compute gradient orientation
  6. double uu[9] = {1.0000,
  7. 0.9397,
  8. 0.7660,
  9. 0.500,
  10. 0.1736,
  11. -0.1736,
  12. -0.5000,
  13. -0.7660,
  14. -0.9397
  15. };
  16. double vv[9] = {0.0000,
  17. 0.3420,
  18. 0.6428,
  19. 0.8660,
  20. 0.9848,
  21. 0.9848,
  22. 0.8660,
  23. 0.6428,
  24. 0.3420
  25. };
  26. static inline double min(double x, double y) { return (x <= y ? x : y); }
  27. static inline double max(double x, double y) { return (x <= y ? y : x); }
  28. static inline int min(int x, int y) { return (x <= y ? x : y); }
  29. static inline int max(int x, int y) { return (x <= y ? y : x); }
  30. // main function:
  31. // takes a double color image and a bin size
  32. // returns HOG features
  33. mxArray *process(const mxArray *mximage, const mxArray *mxsbin)
  34. {
  35. double *im = (double *)mxGetPr(mximage);
  36. const int *dims = mxGetDimensions(mximage);
  37. if (mxGetNumberOfDimensions(mximage) != 3 ||
  38. dims[2] != 3 ||
  39. mxGetClassID(mximage) != mxDOUBLE_CLASS)
  40. mexErrMsgTxt("Invalid input");
  41. // size of a cell in pixel, used for both dimensions
  42. int sbin = (int)mxGetScalar(mxsbin);
  43. // memory for caching orientation histograms & their norms
  44. int blocks[2];
  45. // compute number of cells fitting into current image given speficied cell size in pixel
  46. // use floor to prevent running over image borders
  47. blocks[0] = (int)floor( (double)dims[0] / (double)sbin );
  48. blocks[1] = (int)floor( (double)dims[1] / (double)sbin );
  49. // pre-allocate memory
  50. double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));
  51. double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double));
  52. // memory for HOG features
  53. int out[3];
  54. // we compute as many blocks as possible given the specified cell size and the current image
  55. out[0] = max(blocks[0], 0);
  56. out[1] = max(blocks[1], 0);
  57. // note: previously, a subtraction of 2 was done to guarantee identical bilinear interpolation behaviour in all cells
  58. // however, are more interested in obtaining a correct number of returned cells, rather than in slightly more consistent interpolation results...
  59. // apart from that, the only cells affected are the ones on the top and left border of the cell array
  60. out[2] = 27+4+1;
  61. mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);
  62. double *feat = (double *)mxGetPr(mxfeat);
  63. int visible[2];
  64. visible[0] = blocks[0]*sbin;
  65. visible[1] = blocks[1]*sbin;
  66. // If you want to be 100% save, init output values here.
  67. // Note that this should not be needed, since every value will be set lateron explicitely.
  68. //
  69. //double *dst = feat;
  70. //for ( int tmp = 0; tmp < out[0]*out[1]*out[2]; tmp++, dst++ )
  71. //{
  72. // *dst = 0.0;
  73. //}
  74. // start by 1 and end by -1 to ensure safe gradient calculations on boundaries
  75. for (int x = 1; x < visible[1]-1; x++)
  76. {
  77. for (int y = 1; y < visible[0]-1; y++)
  78. {
  79. // first color channel
  80. // NOTE: why minimum check? boundary safety is given by for loop ends
  81. // col offset current row position
  82. double *s = im + min(x, dims[1]-1)*dims[0] + min(y, dims[0]-1);
  83. //gradient in y direction
  84. double dy = *(s+1) - *(s-1);
  85. // gradient in x direction
  86. double dx = *(s+dims[0]) - *(s-dims[0]);
  87. // squared gradient strength on current pixel
  88. double v = dx*dx + dy*dy;
  89. // second color channel
  90. s += dims[0]*dims[1];
  91. //gradient in y direction
  92. double dy2 = *(s+1) - *(s-1);
  93. // gradient in x direction
  94. double dx2 = *(s+dims[0]) - *(s-dims[0]);
  95. // squared gradient strength on current pixel
  96. double v2 = dx2*dx2 + dy2*dy2;
  97. // third color channel
  98. s += dims[0]*dims[1];
  99. //gradient in y direction
  100. double dy3 = *(s+1) - *(s-1);
  101. // gradient in x direction
  102. double dx3 = *(s+dims[0]) - *(s-dims[0]);
  103. // squared gradient strength on current pixel
  104. double v3 = dx3*dx3 + dy3*dy3;
  105. // pick channel with strongest gradient
  106. if (v2 > v)
  107. {
  108. v = v2;
  109. dx = dx2;
  110. dy = dy2;
  111. }
  112. if (v3 > v)
  113. {
  114. v = v3;
  115. dx = dx3;
  116. dy = dy3;
  117. }
  118. //
  119. // now, discretize gradient orientation into one of 18 possible (oriented) bins
  120. //
  121. // strength of strongest orientation in this pixel
  122. double best_dot = 0;
  123. // index of strongest orientation in this pixel
  124. int best_o = 0;
  125. for (int o = 0; o < 9; o++)
  126. {
  127. double dot = uu[o]*dx + vv[o]*dy;
  128. if (dot > best_dot)
  129. {
  130. best_dot = dot;
  131. best_o = o;
  132. }
  133. else if (-dot > best_dot)
  134. {
  135. best_dot = -dot;
  136. best_o = o+9;
  137. }
  138. }
  139. // add to 4 histograms around pixel using linear interpolation
  140. // current position normalized to cell scale, e.g. xp = 1.4 -> located in the left part of second cell
  141. // subtraction of 0.5 to move relative to cell center
  142. double xp = ((double)x+0.5)/(double)sbin - 0.5;
  143. double yp = ((double)y+0.5)/(double)sbin - 0.5;
  144. // that's the index of the cell, whose center is directly left of current position in x direction
  145. int ixp = (int)floor(xp);
  146. // that's the index of the cell, whose center is directly on top of current position in y direction
  147. int iyp = (int)floor(yp);
  148. // distance to left, used for weighting the gradient strength by 1-distance, guaranteed to be in [0,1]
  149. double vx0 = xp-ixp;
  150. // distance to top
  151. double vy0 = yp-iyp;
  152. // distance to right
  153. double vx1 = 1.0-vx0;
  154. // distance to bottom
  155. double vy1 = 1.0-vy0;
  156. // normalized gradient strength on current pixel
  157. v = sqrt(v);
  158. // if left upper cell exists
  159. if ( (ixp >= 0) && (iyp >= 0) )
  160. {
  161. *(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=
  162. vx1*vy1*v; // i.e., (1-distX0)*(1-distY0)*v
  163. }
  164. // if right upper cell exists
  165. if ( (ixp+2 < blocks[1]) && (iyp >= 0) )
  166. {
  167. *(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=
  168. vx0*vy1*v;
  169. }
  170. // if left lower cell exists
  171. if ( (ixp >= 0) && (iyp+2 < blocks[0]) )
  172. {
  173. *(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=
  174. vx1*vy0*v;
  175. }
  176. // if right lower cell exists
  177. if ( (ixp+2 < blocks[1]) && (iyp+2 < blocks[0]) )
  178. {
  179. *(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=
  180. vx0*vy0*v;
  181. }
  182. }
  183. }
  184. // compute energy in each block by summing over orientations
  185. for (int o = 0; o < 9; o++)
  186. {
  187. double *src1 = hist + o*blocks[0]*blocks[1];
  188. double *src2 = hist + (o+9)*blocks[0]*blocks[1];
  189. double *dst = norm;
  190. double *end = norm + blocks[1]*blocks[0];
  191. while (dst < end)
  192. {
  193. *(dst++) += (*src1 + *src2) * (*src1 + *src2);
  194. src1++;
  195. src2++;
  196. }
  197. }
  198. // compute features
  199. for (int x = 0; x < out[1]; x++)
  200. {
  201. for (int y = 0; y < out[0]; y++)
  202. {
  203. double *dst = feat + x*out[0] + y;
  204. double *src, *p, n1, n2, n3, n4;
  205. // compute normalization factors for all 4 possible blocks of 2x2 cells
  206. // block with current, right, lower, and lower right cell
  207. if ( ( (x+1) < out[1] ) && ( (y+1) < out[0] ) )
  208. {
  209. p = norm + x*blocks[0] + y;
  210. n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
  211. }
  212. else
  213. n1 = 1.0 / sqrt(eps);
  214. // block with current, upper, right, and upper right cell
  215. if ( ( (x+1) < out[1] ) && ( (y-1) >= 0 ) )
  216. {
  217. p = norm + x*blocks[0] + (y-1);
  218. n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
  219. }
  220. else
  221. n2 = 1.0 / sqrt(eps);
  222. // block with current, lower, left, and lower left cell
  223. if ( ( (x-1) >= 0 ) && ( (y+1) < out[0] ) )
  224. {
  225. p = norm + (x-1)*blocks[0] + y;
  226. n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
  227. }
  228. else
  229. n3 = 1.0 / sqrt(eps);
  230. // block with current, upper, left, and upper left cell
  231. if ( ( (x-1) >= 0 ) && ( (y-1) >= 0 ) )
  232. {
  233. p = norm + (x-1)*blocks[0] + (y-1);
  234. n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
  235. }
  236. else
  237. n4 = 1.0 / sqrt(eps);
  238. // copy normalization factors for blocks on boundaries
  239. // -----------------
  240. // | n4 | | n2 |
  241. // -----------------
  242. // | | x,y | |
  243. // -----------------
  244. // | n3 | | n1 |
  245. // -----------------
  246. if ( (x) == 0 ) // left boundary
  247. {
  248. if ( (y) == 0 ) // left top corner
  249. {
  250. n4 = n1; n3 = n1; n2 = n1;
  251. }
  252. else if ( (y) == (out[0]-1) ) // left lower corner
  253. {
  254. n4 = n2; n3 = n2; n1 = n2;
  255. }
  256. else
  257. {
  258. n4 = n2; n3 = n1;
  259. }
  260. }
  261. else if ( (x) == (out[1]-1) ) // right boundary
  262. {
  263. if ( (y) == 0 ) // right top corner
  264. {
  265. n4 = n3; n2 = n3; n1 = n3;
  266. }
  267. else if ( (y) == (out[0]-1) ) // right lower corner
  268. {
  269. n3 = n4; n2 = n4; n1 = n4;
  270. }
  271. else
  272. {
  273. n2 = n4; n1 = n3;
  274. }
  275. }
  276. if ( (y) == 0 ) // upper boundary ( corners already tackled)
  277. {
  278. if ( ( x > 0 ) && ( x < (out[1]-1) ) )
  279. {
  280. n4 = n3; n2 = n1;
  281. }
  282. }
  283. else if ( (y) == (out[0]-1) ) // lower boundary ( corners already tackled)
  284. {
  285. if ( ( x > 0 ) && ( x < (out[1]-1) ) )
  286. {
  287. n3 = n4; n1 = n2;
  288. }
  289. }
  290. double t1 = 0;
  291. double t2 = 0;
  292. double t3 = 0;
  293. double t4 = 0;
  294. // contrast-sensitive features
  295. src = hist + (x)*blocks[0] + (y);
  296. for (int o = 0; o < 18; o++)
  297. {
  298. double h1 = min(*src * n1, 0.2);
  299. double h2 = min(*src * n2, 0.2);
  300. double h3 = min(*src * n3, 0.2);
  301. double h4 = min(*src * n4, 0.2);
  302. *dst = 0.5 * (h1 + h2 + h3 + h4);
  303. t1 += h1;
  304. t2 += h2;
  305. t3 += h3;
  306. t4 += h4;
  307. // move pointers to next position
  308. dst += out[0]*out[1];
  309. src += blocks[0]*blocks[1];
  310. }
  311. // contrast-insensitive features
  312. src = hist + (x)*blocks[0] + (y);
  313. for (int o = 0; o < 9; o++)
  314. {
  315. double sum = *src + *(src + 9*blocks[0]*blocks[1]);
  316. double h1 = min(sum * n1, 0.2);
  317. double h2 = min(sum * n2, 0.2);
  318. double h3 = min(sum * n3, 0.2);
  319. double h4 = min(sum * n4, 0.2);
  320. *dst = 0.5 * (h1 + h2 + h3 + h4);
  321. dst += out[0]*out[1];
  322. src += blocks[0]*blocks[1];
  323. }
  324. // texture features
  325. //to be complicable to FFLD code
  326. *dst = 0.2357 * t4;
  327. //*dst = 0.2357 * t1;
  328. dst += out[0]*out[1];
  329. *dst = 0.2357 * t2;
  330. dst += out[0]*out[1];
  331. *dst = 0.2357 * t3;
  332. dst += out[0]*out[1];
  333. //to be complicable to FFLD code
  334. *dst = 0.2357 * t1;
  335. //*dst = 0.2357 * t4;
  336. // truncation feature
  337. dst += out[0]*out[1];
  338. *dst = 0;
  339. }
  340. }
  341. mxFree(hist);
  342. mxFree(norm);
  343. return mxfeat;
  344. }
  345. // matlab entry point
  346. // F = features(image, bin)
  347. // image should be color with double values
  348. void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  349. {
  350. if (nrhs != 2)
  351. mexErrMsgTxt("Wrong number of inputs");
  352. if (nlhs != 1)
  353. mexErrMsgTxt("Wrong number of outputs");
  354. plhs[0] = process(prhs[0], prhs[1]);
  355. }