hace 9 años · c21c1976ce
--- a/caffe_features_multiple_images.m
+++ b/caffe_features_multiple_images.m
@@ -1,5 +1,5 @@
 
				-function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, s_layer)
			
 
				-% function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, s_layer)
			
 
				+function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, settings ) 
			
 
				+% function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net, settings ) 
			
 
				 % 
			
 
				 %  BRIEF:
			
 
				 %   Run a forward pass of a given net on a set of images which are 
			
@@ -15,7 +15,15 @@ function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net,
 
				 %                  Required to be cropped to the input size of your
			
 
				 %                  network! See caffe_load_network.m
			
 
				 %   net         -- a previously loaded network, see caffe_load_network.m
			
 
				-%   s_layer     -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
			
 
				+%   settings    -- optional, (default []), struct with following possible fields
			
 
				+%     .s_layer     -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
			
 
				+%     .b_apply_bilinear_pooling
			
 
				+%                  -- optional (default: false),
			
 
				+%     .b_skip_normalization_in_bilinear_pooling
			
 
				+%                  -- optional (default: false),
			
 
				+%     .b_apply_log_M
			
 
				+%                  -- optional (default: false),
			
 
				+%     .f_sigma     -- optional (default: 1e-5),
			
 
				 %
			
 
				 
			
 
				 
			
@@ -27,8 +35,16 @@ function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net,
 
				         error ( 'no network passed');
			
 
				     end
			
 
				     if (nargin<4)
			
 
				-        s_layer = 'relu7';
			
 
				-    end 
			
 
				+        settings = [];
			
 
				+    end    
			
 
				+    
			
 
				+    
			
 
				+    s_layer                  = getFieldWithDefault ( settings, 's_layer',                  'relu7');
			
 
				+    b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
			
 
				+    b_skip_normalization_in_bilinear_pooling ...
			
 
				+                             = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
			
 
				+    b_apply_log_M            = getFieldWithDefault ( settings, 'b_apply_log_M',            false );
			
 
				+    f_sigma    
			
 
				     
			
 
				     %% prepare list of filenames
			
 
				     b_filelistmode = ischar( s_filelist );
			
@@ -81,23 +97,81 @@ function [ features ] = caffe_features_multiple_images( s_filelist, f_mean, net,
 
				         [~] = net.forward( batch_data );
			
 
				         
			
 
				         % fetch activations from specified layer
			
 
				-        tmp_feat = net.blobs( s_layer ).get_data();    
			
 
				+        tmp_feat = net.blobs( s_layer ).get_data();
			
 
				         
			
 
				-        % vectorize and concatenate activation maps
			
 
				-        if ( ndims( tmp_feat ) > 2 )
			
 
				-            tmp_feat = reshape( tmp_feat, ...
			
 
				-                                size(tmp_feat,1)*size(tmp_feat,2)*size(tmp_feat,3), ...
			
 
				-                                size(tmp_feat,4)...
			
 
				-                              );    
			
 
				-        end
			
 
				-                      
			
 
				-        % allocate enough space in first run       
			
 
				-        if ( ~exist('features','var') )
			
 
				-            features = zeros( size(tmp_feat,1), size(s_filelist_to_use,1), 'single');
			
 
				+        %% optional: bilinear pooling
			
 
				+        if ( b_apply_bilinear_pooling )    
			
 
				+            %% efficient version: reshape and sum
			
 
				+            %
			
 
				+            % compute outer product with sum pooling
			
 
				+            % this is consistent with the matlab code of liu et al. iccv 2015
			
 
				+            for i_img = 1:i_batch_size
			
 
				+                if ( ndims ( tmp_feat ) == 4 )
			
 
				+                    i_channelCount = size ( tmp_feat, 3);   
			
 
				+                    % reshape with [] automatically resizes to correct number of examples,
			
 
				+                    % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );                    
			
 
				+                    featImg = reshape ( tmp_feat(:,:,:,i_img), [],i_channelCount );% size(features,1)*size(features,2),size(features,3) , 'forder');
			
 
				+                    
			
 
				+                    % response normalization to increase comparability of features 
			
 
				+                    % this improves the condition of the bilinear matrix 
			
 
				+                    %
			
 
				+                    if ( ~b_skip_normalization_in_bilinear_pooling )
			
 
				+                        %FIXME this equals 1/abs(sum(features,2))...         
			
 
				+                        featImg = bsxfun(@times, featImg, 1./sqrt(sum(featImg,2).^2)); 
			
 
				+                    end
			
 
				+                    % compute outer product
			
 
				+                    featImg = featImg'*featImg;
			
 
				+                else
			
 
				+                    featImg = tmp_feat(:,i_img)*tmp_feat(:,i_img)';
			
 
				+                end                    
			
 
				+
			
 
				+                if ( b_apply_log_M )
			
 
				+                    %channel_count = size(b(ismember(b_struct(3,:)',layer_image)).data,3);
			
 
				+                    %selection_matrix = logical(tril(ones(channel_count)));            
			
 
				+                    %
			
 
				+                    %features = logm(features'*features+1e-5*eye(channel_count));
			
 
				+
			
 
				+                    featImg = logm( featImg + f_sigma*eye( size(featImg) ) );
			
 
				+                end
			
 
				+
			
 
				+                % take lower tri-angle only to remove redundant information
			
 
				+                % -> logical automatically reshapes into vector
			
 
				+                featImg = featImg ( logical(tril(ones(size(featImg)))));     
			
 
				+
			
 
				+                % pass through signed square root step  (see Lin et al 2015 ICCV)
			
 
				+                featImg = sign(featImg).*sqrt(abs(featImg));
			
 
				+
			
 
				+                % apply L2 normalization (see Lin et al 2015 ICCV)
			
 
				+                featImg = featImg / sqrt(sum(featImg.^2));    
			
 
				+                
			
 
				+                % allocate enough space in first run       
			
 
				+                if ( ~exist('features','var') )
			
 
				+                    features = zeros( size(featImg,1), size(s_filelist_to_use,1), 'single');
			
 
				+                end  
			
 
				+                
			
 
				+                % store computed feature accordingly
			
 
				+                features( :, slices(i)+i_img-1 ) = featImg; 
			
 
				+            end
			
 
				+        else
			
 
				+            % vectorize and concatenate activation maps
			
 
				+            if ( ndims( tmp_feat ) > 2 )
			
 
				+                tmp_feat = reshape( tmp_feat, ...
			
 
				+                                    size(tmp_feat,1)*size(tmp_feat,2)*size(tmp_feat,3), ...
			
 
				+                                    size(tmp_feat,4)...
			
 
				+                                  );    
			
 
				+            end 
			
 
				+            
			
 
				+            % allocate enough space in first run       
			
 
				+            if ( ~exist('features','var') )
			
 
				+                features = zeros( size(tmp_feat,1), size(s_filelist_to_use,1), 'single');
			
 
				+            end   
			
 
				+            
			
 
				+            % store computed feature accordingly
			
 
				+            features( :, slices(i):(slices(i+1)-1) ) = tmp_feat( :, 1:(slices(i+1)-slices(i)) );            
			
 
				         end
			
 
				         
			
 
				-        % store computed feature accordingly
			
 
				-        features( :, slices(i):(slices(i+1)-1) ) = tmp_feat( :, 1:(slices(i+1)-slices(i)) );
			
 
				+       
			
 
				+
			
 
				     end
			
 
				     
			
 
				     % convert output to double precision
			
--- a/caffe_features_single_image.m
+++ b/caffe_features_single_image.m
@@ -1,5 +1,5 @@
 
				-function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_layer)
			
 
				-% function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_layer)
			
 
				+function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings ) 
			
 
				+% function [ features ] = caffe_features_single_image( i_image, f_mean, net, settings ) 
			
 
				 % 
			
 
				 %  BRIEF:
			
 
				 %   Run a forward pass of a given net on a single image and grep features of a specified layer
			
@@ -11,7 +11,15 @@ function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_lay
 
				 %                  Required to be cropped to the input size of your
			
 
				 %                  network! See caffe_load_network.m
			
 
				 %   net         -- a previously loaded network, see caffe_load_network.m
			
 
				-%   s_layer     -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
			
 
				+%   settings    -- optional, (default []), struct with following possible fields
			
 
				+%     .s_layer     -- optional (default: 'relu7'), string, specifies the layer used for feature exatraction
			
 
				+%     .b_apply_bilinear_pooling
			
 
				+%                  -- optional (default: false),
			
 
				+%     .b_skip_normalization_in_bilinear_pooling
			
 
				+%                  -- optional (default: false),
			
 
				+%     .b_apply_log_M
			
 
				+%                  -- optional (default: false),
			
 
				+%     .f_sigma     -- optional (default: 1e-5),
			
 
				 %
			
 
				 
			
 
				     %% parse inputs
			
@@ -22,10 +30,17 @@ function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_lay
 
				         error ( 'no network passed');
			
 
				     end
			
 
				     if (nargin<4)
			
 
				-        s_layer = 'relu7';
			
 
				+        settings = [];
			
 
				     end    
			
 
				     
			
 
				-
			
 
				+    
			
 
				+    s_layer                  = getFieldWithDefault ( settings, 's_layer',                  'relu7');
			
 
				+    b_apply_bilinear_pooling = getFieldWithDefault ( settings, 'b_apply_bilinear_pooling', false );
			
 
				+    b_skip_normalization_in_bilinear_pooling ...
			
 
				+                             = getFieldWithDefault ( settings, 'b_skip_normalization_in_bilinear_pooling', false );
			
 
				+    b_apply_log_M            = getFieldWithDefault ( settings, 'b_apply_log_M',            false );
			
 
				+    f_sigma                  = getFieldWithDefault ( settings, 'f_sigma',                  1e-5 );
			
 
				+    
			
 
				 
			
 
				     %% old caffe layout
			
 
				 %     % prepare image for caffe format
			
@@ -42,7 +57,7 @@ function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_lay
 
				 
			
 
				     %% new caffe layout
			
 
				     % scale, permute dimensions, subtract mean
			
 
				-    data         = caffe_prepare_image( i_image, f_mean );
			
 
				+    data             = caffe_prepare_image( i_image, f_mean );
			
 
				     
			
 
				     % check that network was prepared to work on single images
			
 
				     net_input_shape  = net.blobs('data').shape;    
			
@@ -56,11 +71,59 @@ function [ features ] = caffe_features_single_image( i_image, f_mean, net, s_lay
 
				     % fetch activations from specified layer
			
 
				     features = net.blobs( s_layer ).get_data();
			
 
				     
			
 
				-    % vectorize and concatenate activation maps
			
 
				-    features = reshape( features, ...
			
 
				-                        size(features,1)*size(features,2)*size(features,3), ...
			
 
				-                        size(features,4)...
			
 
				-                      );
			
 
				+    %% optional: bilinear pooling
			
 
				+    if ( b_apply_bilinear_pooling )
			
 
				+               
			
 
				+        %% efficient version: reshape and sum
			
 
				+        %
			
 
				+        % compute outer product with sum pooling
			
 
				+        % this is consistent with the matlab code of liu et al. iccv 2015
			
 
				+        if ( ndims ( features ) == 3 )
			
 
				+            i_channelCount = size ( features, 3);            
			
 
				+            % reshape with [] automatically resizes to correct number of examples,
			
 
				+            % this is equivalent to ...size(features,1)*size(features,2),size(features,3) );
			
 
				+            features  = reshape ( features, [],i_channelCount);
			
 
				+            
			
 
				+            % response normalization to increase comparability of features 
			
 
				+            % this improves the condition of the bilinear matrix 
			
 
				+            %
			
 
				+            if ( ~b_skip_normalization_in_bilinear_pooling )
			
 
				+                %FIXME this equals 1/abs(sum(features,2))...
			
 
				+                features = bsxfun(@times, features, 1./sqrt(sum(features,2).^2)); 
			
 
				+            end
			
 
				+            % compute outer product
			
 
				+            features = features'*features;
			
 
				+        else
			
 
				+            features = features / sqrt(sum(features)^2); 
			
 
				+            features = features*features';
			
 
				+        end                    
			
 
				+                    
			
 
				+
			
 
				+        if ( b_apply_log_M )
			
 
				+            features = logm( features + f_sigma*eye( size(features) ) );
			
 
				+        end
			
 
				+        
			
 
				+        % take lower tri-angle only to remove redundant information
			
 
				+        % -> logical automatically reshapes into vector
			
 
				+        features     = features ( logical(tril(ones(size(features)))));     
			
 
				+        
			
 
				+        % pass through signed square root step  (see Lin et al 2015 ICCV)
			
 
				+        features     = sign(features).*sqrt(abs(features));
			
 
				+        
			
 
				+        % apply L2 normalization (see Lin et al 2015 ICCV)
			
 
				+        features     = features / sqrt(sum(features.^2));
			
 
				+        
			
 
				+        
			
 
				+
			
 
				+    else
			
 
				+        % vectorize and concatenate activation maps
			
 
				+        features = reshape( features, ...
			
 
				+                            size(features,1)*size(features,2)*size(features,3), ...
			
 
				+                            size(features,4)...
			
 
				+                          );        
			
 
				+    end
			
 
				+    
			
 
				+
			
 
				     
			
 
				     % convert output to double precision
			
 
				     features = double(features);
			
--- a/getFieldWithDefault.m
+++ b/getFieldWithDefault.m
@@ -0,0 +1,26 @@
 
				+function myOut = getFieldWithDefault ( myStruct, myField, myDefault )
			
 
				+% function myOut = getFieldWithDefault ( myStruct, myField, myDefault )
			
 
				+% 
			
 
				+%  BRIEF:
			
 
				+%    Get the content of a named field of a struct if existing, or return a
			
 
				+%    specified default value instead
			
 
				+%    ...inspired by NICE::Config.gI ('section','name', default )
			
 
				+% 
			
 
				+%  INPUT:
			
 
				+%    myStruct  -- a struct
			
 
				+%    myField   -- string with desired field name
			
 
				+%    myDefault -- default value to use if field is non-existing or empty
			
 
				+% 
			
 
				+%  OUTPUT:
			
 
				+%    myOut     -- content of field or default value
			
 
				+% 
			
 
				+% 
			
 
				+% author: Alexander Freytag
			
 
				+% date  : 04-03-2014 ( dd-mm-yyyy )
			
 
				+
			
 
				+    if ( ~isempty(myStruct) && isfield(myStruct, myField) && ~isempty( myStruct.( myField ) ))
			
 
				+        myOut = myStruct.( myField );
			
 
				+    else
			
 
				+        myOut = myDefault;
			
 
				+    end
			
 
				+end