ComputerVisionJena
/
Matlab_CaffeTools


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
							function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings ) 
% function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings ) 
% 
%  BRIEF:
%   Run a forward pass of a given net on a single image and grep features of a specified layer
%   Requires Caffe version from 17-07-2015 (hash: 6d92d8fcfe0eea9495ffbc)
% 
%  INPUT
%   i_image     -- 2d or 3d matrix
%   f_mean      -- The average image of your dataset. This should be the same that was used during training of the CNN model.
%                  Required to be cropped to the input size of your
%                  network! See caffe_load_network.m
%   net         -- a previously loaded network, see caffe_load_network.m
%   settings    -- optional, (default []), struct with following possible fields
%     .s_layer     -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
%     .b_apply_bilinear_pooling
%                  -- optional (default: false),
%     .b_skip_normalization_in_bilinear_pooling
%                  -- optional (default: false),
%     .b_apply_log_M
%                  -- optional (default: false),
%     .f_sigma     -- optional (default: 1e-5),
%

    %% parse inputs
    if (nargin<2)
        error ( 'no mean passed');
    end
    if (nargin<3)
        error ( 'no network passed');
    end
    if (nargin<4)
        settings = [];
    end    
    
    
    s_layer                  = getFieldWithDefault ( settings, 's_layer',                  'relu7');
    b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
    b_skip_normalization_in_bilinear_pooling ...
                             = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
    b_apply_log_M            = getFieldWithDefault ( settings, 'b_apply_log_M',            false );
    f_sigma                  = getFieldWithDefault ( settings, 'f_sigma',                  1e-5 );
    

    %% old caffe layout
%     % prepare image for caffe format
%     batch_data          = zeros(i_width,i_width,3,1,'single');
%     batch_data(:,:,:,1) = caffe_prepare_image(i_image,f_mean,i_width);
%     batch_data          = repmat(batch_data, [1,1,1, batch_size] );
%     
% 
%     %% grep output and adjust desired format
%     features = caffe_('get_features',{batch_data},layer);
%     features = reshape(features{1},size(features{1},1)*size(features{1},2)*size(features{1},3),size(features{1},4))';
%     
%     features = double(features(1,:)');

    %% new caffe layout
    % scale, permute dimensions, subtract mean
    data             = caffe_prepare_image( i_image, f_mean );
    
    % check that network was prepared to work on single images
    net_input_shape  = net.blobs('data').shape;    
    i_batch_size     = net_input_shape(4);
    
    assert (  i_batch_size == 1, 'network not reshaped for passing only a single image' );
    
    % run a single forward pass
    [~] = net.forward({data});    
    
    % fetch activations from specified layer
    features = net.blobs( s_layer ).get_data();
    
    %% optional: bilinear pooling
    if ( b_apply_bilinear_pooling )
               
        %% efficient version: reshape and sum
        %
        % compute outer product with sum pooling
        % this is consistent with the matlab code of liu et al. iccv 2015
        if ( ndims ( features ) == 3 )
            i_channelCount = size ( features, 3);            
            % reshape with [] automatically resizes to correct number of examples,
            % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );
            features  = reshape ( features, [],i_channelCount);
            
            % response normalization to increase comparability of features 
            % this improves the condition of the bilinear matrix 
            %
            if ( ~b_skip_normalization_in_bilinear_pooling )
                %this equals 1/abs(sum(features,2))...         
                %
                % note: the max... is just for security reasons to
                % prevent division by zero in case that *all*
                % values should be zero or the signed sum equals zero
                %
                features = bsxfun(@times, features, 1./( max( 10e-8, sqrt(sum(features,2).^2) ) )      );                 
            end
            % compute outer product
            features = features'*features;
        else
            features = features / sqrt(sum(features)^2); 
            features = features*features';
        end                    
                    

        if ( b_apply_log_M )
            features = logm( features + f_sigma*eye( size(features) ) );
        end
        
        % take lower tri-angle only to remove redundant information
        % -> logical automatically reshapes into vector
        features     = features ( logical(tril(ones(size(features)))));     
        
        % pass through signed square root step  (see Lin et al 2015 ICCV)
        features     = sign(features).*sqrt(abs(features));
        
        % apply L2 normalization (see Lin et al 2015 ICCV)
        features     = features / sqrt(sum(features.^2));
        
        

    else
        % vectorize and concatenate activation maps
        features = reshape( features, ...
                            size(features,1)*size(features,2)*size(features,3), ...
                            size(features,4)...
                          );        
    end
    

    
    % convert output to double precision
    features = double(features);
end