123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings )
- % function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings )
- %
- % BRIEF:
- % Run a forward pass of a given net on a single image and grep features of a specified layer
- % Requires Caffe version from 17-07-2015 (hash: 6d92d8fcfe0eea9495ffbc)
- %
- % INPUT
- % i_image -- 2d or 3d matrix
- % f_mean -- The average image of your dataset. This should be the same that was used during training of the CNN model.
- % Required to be cropped to the input size of your
- % network! See caffe_load_network.m
- % net -- a previously loaded network, see caffe_load_network.m
- % settings -- optional, (default []), struct with following possible fields
- % .s_layer -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
- % .b_apply_bilinear_pooling
- % -- optional (default: false),
- % .b_skip_normalization_in_bilinear_pooling
- % -- optional (default: false),
- % .b_apply_log_M
- % -- optional (default: false),
- % .f_sigma -- optional (default: 1e-5),
- %
- %% parse inputs
- if (nargin<2)
- error ( 'no mean passed');
- end
- if (nargin<3)
- error ( 'no network passed');
- end
- if (nargin<4)
- settings = [];
- end
-
-
- s_layer = getFieldWithDefault ( settings, 's_layer', 'relu7');
- b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
- b_skip_normalization_in_bilinear_pooling ...
- = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
- b_apply_log_M = getFieldWithDefault ( settings, 'b_apply_log_M', false );
- f_sigma = getFieldWithDefault ( settings, 'f_sigma', 1e-5 );
-
- %% old caffe layout
- % % prepare image for caffe format
- % batch_data = zeros(i_width,i_width,3,1,'single');
- % batch_data(:,:,:,1) = caffe_prepare_image(i_image,f_mean,i_width);
- % batch_data = repmat(batch_data, [1,1,1, batch_size] );
- %
- %
- % %% grep output and adjust desired format
- % features = caffe_('get_features',{batch_data},layer);
- % features = reshape(features{1},size(features{1},1)*size(features{1},2)*size(features{1},3),size(features{1},4))';
- %
- % features = double(features(1,:)');
- %% new caffe layout
- % scale, permute dimensions, subtract mean
- data = caffe_prepare_image( i_image, f_mean );
-
- % check that network was prepared to work on single images
- net_input_shape = net.blobs('data').shape;
- i_batch_size = net_input_shape(4);
-
- assert ( i_batch_size == 1, 'network not reshaped for passing only a single image' );
-
- % run a single forward pass
- [~] = net.forward({data});
-
- % fetch activations from specified layer
- features = net.blobs( s_layer ).get_data();
-
- %% optional: bilinear pooling
- if ( b_apply_bilinear_pooling )
-
- %% efficient version: reshape and sum
- %
- % compute outer product with sum pooling
- % this is consistent with the matlab code of liu et al. iccv 2015
- if ( ndims ( features ) == 3 )
- i_channelCount = size ( features, 3);
- % reshape with [] automatically resizes to correct number of examples,
- % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );
- features = reshape ( features, [],i_channelCount);
-
- % response normalization to increase comparability of features
- % this improves the condition of the bilinear matrix
- %
- if ( ~b_skip_normalization_in_bilinear_pooling )
- %this equals 1/abs(sum(features,2))...
- %
- % note: the max... is just for security reasons to
- % prevent division by zero in case that *all*
- % values should be zero or the signed sum equals zero
- %
- features = bsxfun(@times, features, 1./( max( 10e-8, sqrt(sum(features,2).^2) ) ) );
- end
- % compute outer product
- features = features'*features;
- else
- features = features / sqrt(sum(features)^2);
- features = features*features';
- end
-
- if ( b_apply_log_M )
- features = logm( features + f_sigma*eye( size(features) ) );
- end
-
- % take lower tri-angle only to remove redundant information
- % -> logical automatically reshapes into vector
- features = features ( logical(tril(ones(size(features)))));
-
- % pass through signed square root step (see Lin et al 2015 ICCV)
- features = sign(features).*sqrt(abs(features));
-
- % apply L2 normalization (see Lin et al 2015 ICCV)
- features = features / sqrt(sum(features.^2));
-
-
- else
- % vectorize and concatenate activation maps
- features = reshape( features, ...
- size(features,1)*size(features,2)*size(features,3), ...
- size(features,4)...
- );
- end
-
-
- % convert output to double precision
- features = double(features);
- end
|