caffe_features_single_image.m 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings )
  2. % function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings )
  3. %
  4. % BRIEF:
  5. % Run a forward pass of a given net on a single image and grep features of a specified layer
  6. % Requires Caffe version from 17-07-2015 (hash: 6d92d8fcfe0eea9495ffbc)
  7. %
  8. % INPUT
  9. % i_image -- 2d or 3d matrix
  10. % f_mean -- The average image of your dataset. This should be the same that was used during training of the CNN model.
  11. % Required to be cropped to the input size of your
  12. % network! See caffe_load_network.m
  13. % net -- a previously loaded network, see caffe_load_network.m
  14. % settings -- optional, (default []), struct with following possible fields
  15. % .s_layer -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
  16. % .b_apply_bilinear_pooling
  17. % -- optional (default: false),
  18. % .b_skip_normalization_in_bilinear_pooling
  19. % -- optional (default: false),
  20. % .b_apply_log_M
  21. % -- optional (default: false),
  22. % .f_sigma -- optional (default: 1e-5),
  23. %
  24. %% parse inputs
  25. if (nargin<2)
  26. error ( 'no mean passed');
  27. end
  28. if (nargin<3)
  29. error ( 'no network passed');
  30. end
  31. if (nargin<4)
  32. settings = [];
  33. end
  34. s_layer = getFieldWithDefault ( settings, 's_layer', 'relu7');
  35. b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
  36. b_skip_normalization_in_bilinear_pooling ...
  37. = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
  38. b_apply_log_M = getFieldWithDefault ( settings, 'b_apply_log_M', false );
  39. f_sigma = getFieldWithDefault ( settings, 'f_sigma', 1e-5 );
  40. %% old caffe layout
  41. % % prepare image for caffe format
  42. % batch_data = zeros(i_width,i_width,3,1,'single');
  43. % batch_data(:,:,:,1) = caffe_prepare_image(i_image,f_mean,i_width);
  44. % batch_data = repmat(batch_data, [1,1,1, batch_size] );
  45. %
  46. %
  47. % %% grep output and adjust desired format
  48. % features = caffe_('get_features',{batch_data},layer);
  49. % features = reshape(features{1},size(features{1},1)*size(features{1},2)*size(features{1},3),size(features{1},4))';
  50. %
  51. % features = double(features(1,:)');
  52. %% new caffe layout
  53. % scale, permute dimensions, subtract mean
  54. data = caffe_prepare_image( i_image, f_mean );
  55. % check that network was prepared to work on single images
  56. net_input_shape = net.blobs('data').shape;
  57. i_batch_size = net_input_shape(4);
  58. assert ( i_batch_size == 1, 'network not reshaped for passing only a single image' );
  59. % run a single forward pass
  60. [~] = net.forward({data});
  61. % fetch activations from specified layer
  62. features = net.blobs( s_layer ).get_data();
  63. %% optional: bilinear pooling
  64. if ( b_apply_bilinear_pooling )
  65. %% efficient version: reshape and sum
  66. %
  67. % compute outer product with sum pooling
  68. % this is consistent with the matlab code of liu et al. iccv 2015
  69. if ( ndims ( features ) == 3 )
  70. i_channelCount = size ( features, 3);
  71. % reshape with [] automatically resizes to correct number of examples,
  72. % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );
  73. features = reshape ( features, [],i_channelCount);
  74. % response normalization to increase comparability of features
  75. % this improves the condition of the bilinear matrix
  76. %
  77. if ( ~b_skip_normalization_in_bilinear_pooling )
  78. %this equals 1/abs(sum(features,2))...
  79. %
  80. % note: the max... is just for security reasons to
  81. % prevent division by zero in case that *all*
  82. % values should be zero or the signed sum equals zero
  83. %
  84. features = bsxfun(@times, features, 1./( max( 10e-8, sqrt(sum(features,2).^2) ) ) );
  85. end
  86. % compute outer product
  87. features = features'*features;
  88. else
  89. features = features / sqrt(sum(features)^2);
  90. features = features*features';
  91. end
  92. if ( b_apply_log_M )
  93. features = logm( features + f_sigma*eye( size(features) ) );
  94. end
  95. % take lower tri-angle only to remove redundant information
  96. % -> logical automatically reshapes into vector
  97. features = features ( logical(tril(ones(size(features)))));
  98. % pass through signed square root step (see Lin et al 2015 ICCV)
  99. features = sign(features).*sqrt(abs(features));
  100. % apply L2 normalization (see Lin et al 2015 ICCV)
  101. features = features / sqrt(sum(features.^2));
  102. else
  103. % vectorize and concatenate activation maps
  104. features = reshape( features, ...
  105. size(features,1)*size(features,2)*size(features,3), ...
  106. size(features,4)...
  107. );
  108. end
  109. % convert output to double precision
  110. features = double(features);
  111. end