caffe_features_multiple_images.m 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, settings )
  2. % function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, settings )
  3. %
  4. % BRIEF:
  5. % Run a forward pass of a given net on a set of images which are
  6. % listed in an external file and grep features of a specified layer.
  7. % Requires Caffe version from 17-07-2015 (hash: 6d92d8fcfe0eea9495ffbc)
  8. %
  9. % INPUT
  10. % s_filelist -- string, filename to an external list which contains
  11. % image names in each line. Alternatively, the variable is
  12. % given as cell array where each entry contains a loaded
  13. % image.
  14. % f_mean -- The average image of your dataset. This should be the same that was used during training of the CNN model.
  15. % Required to be cropped to the input size of your
  16. % network! See caffe_load_network.m
  17. % net -- a previously loaded network, see caffe_load_network.m
  18. % settings -- optional, (default []), struct with following possible fields
  19. % .s_layer -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
  20. % .b_apply_bilinear_pooling
  21. % -- optional (default: false),
  22. % .b_skip_normalization_in_bilinear_pooling
  23. % -- optional (default: false),
  24. % .b_apply_log_M
  25. % -- optional (default: false),
  26. % .f_sigma -- optional (default: 1e-5),
  27. %
  28. %% parse inputs
  29. if (nargin<2)
  30. error ( 'no mean passed');
  31. end
  32. if (nargin<3)
  33. error ( 'no network passed');
  34. end
  35. if (nargin<4)
  36. settings = [];
  37. end
  38. s_layer = getFieldWithDefault ( settings, 's_layer', 'relu7');
  39. b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
  40. b_skip_normalization_in_bilinear_pooling ...
  41. = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
  42. b_apply_log_M = getFieldWithDefault ( settings, 'b_apply_log_M', false );
  43. f_sigma = getFieldWithDefault ( settings, 'f_sigma', 1e-5 );
  44. %% prepare list of filenames
  45. b_filelistmode = ischar( s_filelist );
  46. if (b_filelistmode)
  47. % load the file list
  48. fid = fopen( s_filelist );
  49. s_filelist_to_use = textscan(fid,'%s');
  50. s_filelist_to_use = s_filelist_to_use{1};
  51. fclose(fid);
  52. else
  53. % use the passed filelist
  54. s_filelist_to_use = s_filelist;
  55. end
  56. %% new caffe layout
  57. net_input_shape = net.blobs('data').shape;
  58. i_batch_size = net_input_shape(4);
  59. % create tmp for batch
  60. batch_data = {zeros(net_input_shape(1),... %height
  61. net_input_shape(2),... %width
  62. net_input_shape(3),... %width, ...%RGB
  63. i_batch_size,...
  64. 'single')};
  65. % Calculate the starting indices of every batch
  66. slices = 1:i_batch_size:size(s_filelist_to_use,1);
  67. slices(end+1)=size(s_filelist_to_use,1)+1;
  68. % crop the list of files into batches of adequate size
  69. % then run over every batch
  70. for i=1:numel(slices)-1
  71. % debug information for progress
  72. if ( ( i > 1 ) && ( mod(i,10) == 0 ) )
  73. fprintf('Running batch number %i of %i\n',i, numel(slices)-1);
  74. end
  75. % load the images of the next slice
  76. for j=slices(i):slices(i+1)-1;
  77. if (b_filelistmode)
  78. batch_data{1}(:,:,:,j-slices(i)+1) = caffe_prepare_image(imread( s_filelist_to_use{j} ), f_mean );
  79. else
  80. batch_data{1}(:,:,:,j-slices(i)+1) = caffe_prepare_image(s_filelist_to_use{j}, f_mean );
  81. end
  82. end
  83. % run a single forward pass
  84. [~] = net.forward( batch_data );
  85. % fetch activations from specified layer
  86. tmp_feat = net.blobs( s_layer ).get_data();
  87. %% optional: bilinear pooling
  88. if ( b_apply_bilinear_pooling )
  89. %% efficient version: reshape and sum
  90. %
  91. % compute outer product with sum pooling
  92. % this is consistent with the matlab code of liu et al. iccv 2015
  93. for i_img = 1:i_batch_size
  94. if ( i_batch_size ==1 )
  95. b_has_spatial_support = ( ndims ( tmp_feat ) == 3 );
  96. else
  97. b_has_spatial_support = ( ndims ( tmp_feat ) == 4 );
  98. end
  99. if ( b_has_spatial_support )
  100. i_channelCount = size ( tmp_feat, 3);
  101. % reshape with [] automatically resizes to correct number of examples,
  102. % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );
  103. featImg = reshape ( tmp_feat(:,:,:,i_img), [],i_channelCount );% size(features,1)*size(features,2),size(features,3) , 'forder');
  104. % response normalization to increase comparability of features
  105. % this improves the condition of the bilinear matrix
  106. %
  107. if ( ~b_skip_normalization_in_bilinear_pooling )
  108. %this equals 1/abs(sum(features,2))...
  109. %
  110. % note: the max... is just for security reasons to
  111. % prevent division by zero in case that *all*
  112. % values should be zero or the signed sum equals zero
  113. %
  114. featImg = bsxfun(@times, featImg, 1./( max( 10e-8, sqrt(sum(featImg,2).^2) ) ) );
  115. end
  116. % compute outer product
  117. featImg = featImg'*featImg;
  118. else
  119. featImg = tmp_feat(:,i_img)*tmp_feat(:,i_img)';
  120. end
  121. if ( b_apply_log_M )
  122. %channel_count = size(b(ismember(b_struct(3,:)',layer_image)).data,3);
  123. %selection_matrix = logical(tril(ones(channel_count)));
  124. %
  125. %features = logm(features'*features+1e-5*eye(channel_count));
  126. featImg = logm( featImg + f_sigma*eye( size(featImg) ) );
  127. end
  128. % take lower tri-angle only to remove redundant information
  129. % -> logical automatically reshapes into vector
  130. featImg = featImg ( logical(tril(ones(size(featImg)))));
  131. % pass through signed square root step (see Lin et al 2015 ICCV)
  132. featImg = sign(featImg).*sqrt(abs(featImg));
  133. % apply L2 normalization (see Lin et al 2015 ICCV)
  134. featImg = featImg / sqrt(sum(featImg.^2));
  135. % allocate enough space in first run
  136. if ( ~exist('features','var') )
  137. features = zeros( size(featImg,1), size(s_filelist_to_use,1), 'single');
  138. end
  139. % store computed feature accordingly
  140. features( :, slices(i)+i_img-1 ) = featImg;
  141. end
  142. else
  143. % vectorize and concatenate activation maps
  144. if ( ndims( tmp_feat ) > 2 )
  145. tmp_feat = reshape( tmp_feat, ...
  146. size(tmp_feat,1)*size(tmp_feat,2)*size(tmp_feat,3), ...
  147. size(tmp_feat,4)...
  148. );
  149. end
  150. % allocate enough space in first run
  151. if ( ~exist('features','var') )
  152. features = zeros( size(tmp_feat,1), size(s_filelist_to_use,1), 'single');
  153. end
  154. % store computed feature accordingly
  155. features( :, slices(i):(slices(i+1)-1) ) = tmp_feat( :, 1:(slices(i+1)-slices(i)) );
  156. end
  157. end
  158. % convert output to double precision
  159. features = double(features);
  160. end