11 жил өмнө · 0f054fe75d
--- a/README
+++ b/README
@@ -0,0 +1,32 @@
 
				+(1) 'Installing everything'
			
 
				+This package is self-contained and independent from other external sources. You only need to add the path accordingly:
			
 
				+     Setup path: init
			
 
				+
			
 
				+
			
 
				+(2) GP models
			
 
				+  Get to know GP models on a simple 1D example: multiclass_gp_1D_example
			
 
				+
			
 
				+(3) Incremental learning
			
 
				+  Compare efficient incremental learning and training from scratch: evaluateIncrementalLearning
			
 
				+
			
 
				+  What should you expect:
			
 
				+  - figure 1: computation times for model updates while adding several samples are displayed. The blue line (efficient updates) 
			
 
				+     should be below the red one (learning from scratch). Results are averaged over several runs
			
 
				+  - figure 2: accuracies obtained while incrementall increasing the training set. Both curves should be identical - if not, something
			
 
				+     went wrong within the process of model updates. Since the toy example only uses artificial data, the accuracy curve is not 
			
 
				+     expected to look super fancy
			
 
				+
			
 
				+NOTE:
			
 
				+The technique for incrementally update GP models updates 'the whole model', although for classification, only alpha actually matters. Therefore, if you are only interested
			
 
				+in classifying samples irrespective of their predictive variance, you might want to use more sophisticated tricks for updating 
			
 
				+the alpha vector only, which can be found exemplarily in 
			
 
				+
			
 
				+  Alexander Freytag  and Erik Rodner and Paul Bodesheim and Joachim Denzler:
			
 
				+  "Labeling examples that matter: Relevance-Based Active Learning with Gaussian Processes".
			
 
				+  Proceedings of the German Conference on Pattern Recognition (GCPR), 2013.
			
 
				+
			
 
				+If you should be further interested in GP equipped with histogram intersection kernels, you might want to use our GPHIK implementations instead, which avoid
			
 
				+explicit covariance construction and are algorithmically much more efficient (eg O(n logn) in training compared to O(n^3) ). Refer to the following source for further details
			
 
				+  Erik Rodner and Alexander Freytag and Paul Bodesheim and Joachim Denzler:
			
 
				+  " Large-Scale Gaussian Process Classification with Flexible Adaptive Histogram Kernels".
			
 
				+  Proceedings of the European Conference on Computer Vision (ECCV), 2012.
			
--- a/evaluateIncrementalLearning.m
+++ b/evaluateIncrementalLearning.m
@@ -0,0 +1,292 @@
 
				+function evaluateIncrementalLearning ( settings )
			
 
				+% compare multi-class incremental learning with Gaussian processes using
			
 
				+% either efficient model updates or batch training from scratch
			
 
				+%
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+
			
 
				+
			
 
				+    if ( (nargin < 1) || isempty ( settings) )
			
 
				+        settings = [];
			
 
				+    end
			
 
				+
			
 
				+    %setup variables for experiments
			
 
				+
			
 
				+    if ( ~isfield(  settings, 'i_numDim') || isempty(settings.i_numDim) )
			
 
				+        settings.i_numDim = 10;
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'd_mean') || isempty(settings.d_mean) )
			
 
				+        settings.d_mean = 3.0;
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'd_var') || isempty(settings.d_var) )
			
 
				+        settings.d_var = 1.0;
			
 
				+    end
			
 
				+
			
 
				+    if ( ~isfield(  settings, 'offsetSpecific') || isempty(settings.offsetSpecific) )
			
 
				+        settings.offsetSpecific = [ 0.0, 2.0, 4.0 ];
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'varSpecific') || isempty(settings.varSpecific) )
			
 
				+        settings.varSpecific = [2.0, 0.5, 1.0];
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'dimToChange') || isempty(settings.dimToChange) )
			
 
				+        settings.dimToChange = [ [1,3] , [2,4] , [5] ];
			
 
				+    end
			
 
				+
			
 
				+    if ( ~isfield(  settings, 'i_numClasses') || isempty(settings.i_numClasses) )
			
 
				+        settings.i_numClasses = 3;
			
 
				+    end
			
 
				+    
			
 
				+    if ( ~isfield(  settings, 'i_numSamplesPerClassTrain') || isempty(settings.i_numSamplesPerClassTrain) )
			
 
				+        settings.i_numSamplesPerClassTrain = 10;    
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'i_numSamplesPerClassUnlabeled') || isempty(settings.i_numSamplesPerClassUnlabeled) )
			
 
				+        settings.i_numSamplesPerClassUnlabeled = 50;
			
 
				+    end
			
 
				+    if ( ~isfield(  settings, 'i_numSamplesPerClassTest') || isempty(settings.i_numSamplesPerClassTest) )
			
 
				+        settings.i_numSamplesPerClassTest = 10;
			
 
				+    end
			
 
				+
			
 
				+    % how many experiments do you want to average?
			
 
				+    if ( ~isfield(  settings, 'i_numRepetitions') || isempty(settings.i_numRepetitions) )
			
 
				+        i_numRepetitions = 10;
			
 
				+    end
			
 
				+
			
 
				+
			
 
				+    timesBatch = zeros ( i_numRepetitions, (settings.i_numClasses*settings.i_numSamplesPerClassUnlabeled+1) );
			
 
				+    timesEfficient = zeros ( i_numRepetitions, (settings.i_numClasses*settings.i_numSamplesPerClassUnlabeled+1) );
			
 
				+    
			
 
				+    arrBatch = zeros ( i_numRepetitions, (settings.i_numClasses*settings.i_numSamplesPerClassUnlabeled+1) );
			
 
				+    arrEfficient = zeros ( i_numRepetitions, (settings.i_numClasses*settings.i_numSamplesPerClassUnlabeled+1) );    
			
 
				+    
			
 
				+    for i_idxRep = 1:i_numRepetitions
			
 
				+        
			
 
				+        %%  (1) sample some features for initial train set, separate test set, and additional separate set to be added incrementally
			
 
				+
			
 
				+        % sample training data
			
 
				+        settings.i_numSamplesPerClass = settings.i_numSamplesPerClassTrain;
			
 
				+        labeledData = sampleData( settings );
			
 
				+
			
 
				+
			
 
				+        % sample unlabeled data
			
 
				+        settings.i_numSamplesPerClass = settings.i_numSamplesPerClassUnlabeled;
			
 
				+        unlabeledData = sampleData( settings );
			
 
				+
			
 
				+
			
 
				+        % sample test data
			
 
				+        settings.i_numSamplesPerClass = settings.i_numSamplesPerClassTest;
			
 
				+        testData = sampleData( settings );
			
 
				+
			
 
				+        % in which order to we want to add the 'unlabeled' sampels?
			
 
				+        idxToAdd = randperm( length(unlabeledData.y) ); 
			
 
				+
			
 
				+
			
 
				+
			
 
				+        %%  (2) run both techniques : efficient updates versus batch training
			
 
				+
			
 
				+        b_efficientUpdates = false;
			
 
				+        resultsBatch = simulateIncrementalLearning ( labeledData, unlabeledData, testData, b_efficientUpdates, idxToAdd );
			
 
				+        
			
 
				+        timesBatch(i_idxRep, :) = resultsBatch.times;
			
 
				+        arrBatch(i_idxRep, :) = resultsBatch.ARR;
			
 
				+        clear ( 'resultsBatch' );
			
 
				+        
			
 
				+        
			
 
				+        
			
 
				+
			
 
				+        b_efficientUpdates = true;
			
 
				+        resultsEfficient = simulateIncrementalLearning ( labeledData, unlabeledData, testData, b_efficientUpdates, idxToAdd );
			
 
				+        
			
 
				+        timesEfficient(i_idxRep, :) = resultsEfficient.times;
			
 
				+        arrEfficient(i_idxRep, :) = resultsEfficient.ARR;
			
 
				+        clear ( 'resultsEfficient' );
			
 
				+        
			
 
				+        
			
 
				+    end
			
 
				+
			
 
				+
			
 
				+    %%  (3) evaluation
			
 
				+    
			
 
				+    % compute relevant values
			
 
				+    timesBatch = mean ( timesBatch ) ;
			
 
				+    timesEfficient = mean ( timesEfficient ) ;
			
 
				+    
			
 
				+    arrBatch = mean( arrBatch );
			
 
				+    arrEfficient = mean ( arrEfficient );      
			
 
				+    
			
 
				+    % setup variables
			
 
				+    if ( ( ~isfield(settings,'s_legendLocation'))  || isempty(settings.s_legendLocation) )
			
 
				+       s_legendLocation = 'NorthEast';
			
 
				+    else
			
 
				+       s_legendLocation = settings.s_legendLocation;
			
 
				+    end     
			
 
				+  
			
 
				+    if ( ( ~isfield(settings,'i_fontSize'))  || isempty(settings.i_fontSize) )
			
 
				+       i_fontSize = 12;
			
 
				+    else
			
 
				+       i_fontSize = settings.i_fontSize;
			
 
				+    end  
			
 
				+  
			
 
				+    if ( ( ~isfield(settings,'i_fontSizeAxis'))  || isempty(settings.i_fontSizeAxis) )
			
 
				+       i_fontSizeAxis = 16;
			
 
				+    else
			
 
				+       i_fontSizeAxis = settings.i_fontSizeAxis;
			
 
				+    end
			
 
				+    
			
 
				+    if ( ( ~isfield(settings,'c'))  || isempty(settings.c) )
			
 
				+        c={[0,0,1], [1,0,0]};
			
 
				+    else
			
 
				+        c = settings.c;
			
 
				+    end  
			
 
				+  
			
 
				+    if ( ( ~isfield(settings,'lineStyle'))  || isempty(settings.lineStyle) )
			
 
				+        lineStyle={'-',   '--' };
			
 
				+    else
			
 
				+        lineStyle = settings.lineStyle;
			
 
				+    end 
			
 
				+  
			
 
				+    if ( ( ~isfield(settings,'marker'))  || isempty(settings.marker) )
			
 
				+        marker={'none', 'none'};
			
 
				+    else
			
 
				+        marker = settings.marker;
			
 
				+    end 
			
 
				+    
			
 
				+    if ( ( ~isfield(settings,'linewidth'))  || isempty(settings.linewidth) )
			
 
				+        linewidth=3;
			
 
				+    else
			
 
				+        linewidth = settings.linewidth;
			
 
				+    end      
			
 
				+    
			
 
				+    %% plot computation times (blue should be lower than red)
			
 
				+    fig_timeComp = figure;
			
 
				+    set ( fig_timeComp, 'name', 'Computation Times for Model Updates');
			
 
				+    hold on;
			
 
				+
			
 
				+    plot ( timesEfficient, 'Color', c{ 1 }, 'LineStyle', lineStyle{ 1 },  'Marker', marker{ 1 }, ...
			
 
				+                'LineWidth', linewidth, 'MarkerSize',8 );
			
 
				+    plot ( timesBatch, 'Color', c{ 2 }, 'LineStyle', lineStyle{ 2 },  'Marker', marker{ 2 }, ...
			
 
				+                'LineWidth', linewidth, 'MarkerSize',8);
			
 
				+    
			
 
				+    leg=legend( {'Efficient', 'Batch'}, 'Location', s_legendLocation,'fontSize', i_fontSize,'LineWidth', 3);
			
 
				+    xlabel('Number of samples added');
			
 
				+    ylabel({'Time spent for model update [s]'});    
			
 
				+    
			
 
				+    text_h=findobj(gca,'type','text');  
			
 
				+    set(text_h,'FontSize',i_fontSize);
			
 
				+    set(gca, 'FontSize', i_fontSize);
			
 
				+    set(get(gca,'YLabel'), 'FontSize', i_fontSizeAxis);
			
 
				+    set(get(gca,'XLabel'), 'FontSize', i_fontSizeAxis);    
			
 
				+
			
 
				+    hold off;
			
 
				+
			
 
				+
			
 
				+    %% plot accuracies (blue and red should be identical)
			
 
				+    fig_accuracyComp = figure;
			
 
				+    set ( fig_accuracyComp, 'name', 'Accuracy over time');
			
 
				+    hold on;
			
 
				+
			
 
				+    plot ( arrEfficient, 'Color', c{ 1 }, 'LineStyle', lineStyle{ 1 },  'Marker', marker{ 1 }, ...
			
 
				+                'LineWidth', linewidth, 'MarkerSize',8 );
			
 
				+    plot ( arrBatch, 'Color', c{ 2 }, 'LineStyle', lineStyle{ 2 },  'Marker', marker{ 2 }, ...
			
 
				+                'LineWidth', linewidth, 'MarkerSize',8);
			
 
				+    leg=legend( {'Efficient', 'Batch'}, 'Location', s_legendLocation,'fontSize', i_fontSize,'LineWidth', 3);
			
 
				+    xlabel('Number of samples added');
			
 
				+    ylabel({'Time spent for model update [s]'});    
			
 
				+    
			
 
				+    text_h=findobj(gca,'type','text');  
			
 
				+    set(text_h,'FontSize',i_fontSize);
			
 
				+    set(gca, 'FontSize', i_fontSize);
			
 
				+    set(get(gca,'YLabel'), 'FontSize', i_fontSizeAxis);
			
 
				+    set(get(gca,'XLabel'), 'FontSize', i_fontSizeAxis);    
			
 
				+
			
 
				+    hold off;
			
 
				+
			
 
				+
			
 
				+    pause;
			
 
				+    close ( fig_timeComp );
			
 
				+    close ( fig_accuracyComp );
			
 
				+    
			
 
				+end    
			
 
				+    
			
 
				+function data = sampleData( settings )
			
 
				+
			
 
				+    if ( isfield(  settings, 'i_numDim') && ~isempty(settings.i_numDim) )
			
 
				+        i_numDim = settings.i_numDim;
			
 
				+    else
			
 
				+        i_numDim = 5;
			
 
				+    end
			
 
				+    
			
 
				+
			
 
				+    if ( isfield(  settings, 'd_mean') && ~isempty(settings.d_mean) )
			
 
				+        d_mean = settings.d_mean;
			
 
				+    else
			
 
				+        d_mean = 3.0;
			
 
				+    end
			
 
				+    
			
 
				+
			
 
				+    if ( isfield(  settings, 'd_var') && ~isempty(settings.d_var) )
			
 
				+        d_var = settings.d_var;
			
 
				+    else
			
 
				+        d_var = 1.0;
			
 
				+    end
			
 
				+    
			
 
				+    if ( isfield(  settings, 'i_numClasses') && ~isempty(settings.i_numClasses) )
			
 
				+        i_numClasses = settings.i_numClasses;
			
 
				+    else
			
 
				+        i_numClasses = 3;
			
 
				+    end
			
 
				+    
			
 
				+    if ( isfield(  settings, 'offsetSpecific') && ~isempty(settings.offsetSpecific) )
			
 
				+        offsetSpecific = settings.offsetSpecific;
			
 
				+    else
			
 
				+        offsetSpecific = [ 0.0, 2.0, 4.0 ];
			
 
				+    end
			
 
				+ 
			
 
				+
			
 
				+    if ( isfield(  settings, 'varSpecific') && ~isempty(settings.varSpecific) )
			
 
				+        varSpecific = settings.varSpecific;
			
 
				+    else
			
 
				+        varSpecific = [2.0, 0.5, 1.0];
			
 
				+    end
			
 
				+    
			
 
				+
			
 
				+    if ( isfield(  settings, 'dimToChange') && ~isempty(settings.dimToChange) )
			
 
				+        dimToChange = settings.dimToChange;
			
 
				+    else
			
 
				+        dimToChange = [ [1,3] , [2,4] , [5] ];
			
 
				+    end
			
 
				+    
			
 
				+
			
 
				+
			
 
				+    
			
 
				+    if ( isfield(  settings, 'i_numSamplesPerClass') && ~isempty(settings.i_numSamplesPerClass) )
			
 
				+        i_numSamplesPerClass = settings.i_numSamplesPerClass;
			
 
				+    else
			
 
				+        i_numSamplesPerClass = 2;
			
 
				+    end    
			
 
				+    
			
 
				+
			
 
				+    % randomly compute some features
			
 
				+    data.X = abs ( d_mean + d_var.*randn(i_numClasses*i_numSamplesPerClass,i_numDim) );
			
 
				+
			
 
				+    % disturbe features for specific classes
			
 
				+    for i_clIdx = 1:i_numClasses
			
 
				+
			
 
				+        i_idxStart = (i_clIdx-1)*i_numSamplesPerClass+1;
			
 
				+        i_idxEnd = i_clIdx*i_numSamplesPerClass;
			
 
				+
			
 
				+        data.X(  i_idxStart:i_idxEnd,  dimToChange(i_clIdx) ) = ...
			
 
				+                    abs ( data.X(i_idxStart:i_idxEnd,1)  + ... 
			
 
				+                    offsetSpecific( i_clIdx) + ... %add class offset
			
 
				+                    varSpecific ( i_clIdx) .*randn(i_numSamplesPerClass,1) ... % add class variance
			
 
				+                 );
			
 
				+    end
			
 
				+
			
 
				+
			
 
				+    % normalize features, thereby simulate histograms, which commonly occure in 
			
 
				+    % computer vision applications
			
 
				+    data.X = bsxfun(@times, data.X, 1./(sum(data.X, 2)));
			
 
				+
			
 
				+    % adapt class labels
			
 
				+    data.y =  bsxfun(@times, ones(i_numSamplesPerClass,1), 1:i_numClasses );
			
 
				+    data.y = data.y(:);
			
 
				+
			
 
				+end    
			
--- a/init.m
+++ b/init.m
@@ -0,0 +1 @@
 
				+addpath('misc');
			
--- a/learn_multiclass_gp.m
+++ b/learn_multiclass_gp.m
@@ -0,0 +1,35 @@
 
				+function model = learn_multiclass_gp(K, labels, noise)
			
 
				+% Learn a multi-class GP model
			
 
				+% 
			
 
				+% function model = learn_multiclass_gp(K, labels, noise)
			
 
				+%
			
 
				+% INPUT: K -- kernel matrix of all data points
			
 
				+%        labels -- multi-class labels, e.g., 1, 3, 4, 5, ...
			
 
				+%        noise -- variance of Gaussian noise model within GP framework
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+
			
 
				+  % obtain unique class labels (length of this vector equals the number of classes)
			
 
				+  unique_labels = unique(labels);
			
 
				+
			
 
				+  % cholesky factorization of regularized kernel matrix
			
 
				+  L = chol( K + noise*eye(size(K)) , 'lower');
			
 
				+
			
 
				+  % store relevant data
			
 
				+  model.L = L;
			
 
				+  model.noise = noise;
			
 
				+  model.unique_labels = unique_labels;
			
 
				+
			
 
				+  % loop over classes and thus over binary one-vs-all tasks
			
 
				+  for k=1:length(unique_labels)
			
 
				+
			
 
				+    % binary label vector for each binary task
			
 
				+    current_labels = 2*(labels==unique_labels(k))-1;
			
 
				+
			
 
				+    % precompute and store alpha vector for each binary task
			
 
				+    model.alpha{k} = L'\(L\current_labels);
			
 
				+
			
 
				+  end
			
 
				+  
			
 
				+  model.y = labels;
			
 
				+
			
 
				+end
			
--- a/misc/covSEiso.m
+++ b/misc/covSEiso.m
@@ -0,0 +1,51 @@
 
				+function K = covSEiso(hyp, x, z, i)
			
 
				+% borrowed from gpml-toolbox of Rasmussen and Nikkisch
			
 
				+% see http://www.gaussianprocess.org/gpml/code/matlab/doc/
			
 
				+
			
 
				+
			
 
				+% Squared Exponential covariance function with isotropic distance measure. The 
			
 
				+% covariance function is parameterized as:
			
 
				+%
			
 
				+% k(x^p,x^q) = sf^2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) 
			
 
				+%
			
 
				+% where the P matrix is ell^2 times the unit matrix and sf^2 is the signal
			
 
				+% variance. The hyperparameters are:
			
 
				+%
			
 
				+% hyp = [ log(ell)
			
 
				+%         log(sf)  ]
			
 
				+%
			
 
				+% For more help on design of covariance functions, try "help covFunctions".
			
 
				+%
			
 
				+% Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-09-10.
			
 
				+%
			
 
				+% See also COVFUNCTIONS.M.
			
 
				+
			
 
				+if nargin<2, K = '2'; return; end                  % report number of parameters
			
 
				+if nargin<3, z = []; end                                   % make sure, z exists
			
 
				+xeqz = numel(z)==0; dg = strcmp(z,'diag') && numel(z)>0;        % determine mode
			
 
				+
			
 
				+ell = exp(hyp(1));                                 % characteristic length scale
			
 
				+sf2 = exp(2*hyp(2));                                           % signal variance
			
 
				+
			
 
				+% precompute squared distances
			
 
				+if dg                                                               % vector kxx
			
 
				+  K = zeros(size(x,1),1);
			
 
				+else
			
 
				+  if xeqz                                                 % symmetric matrix Kxx
			
 
				+    K = sq_dist(x'/ell);
			
 
				+  else                                                   % cross covariances Kxz
			
 
				+    K = sq_dist(x'/ell,z'/ell);
			
 
				+  end
			
 
				+end
			
 
				+
			
 
				+if nargin<4                                                        % covariances
			
 
				+  K = sf2*exp(-K/2);
			
 
				+else                                                               % derivatives
			
 
				+  if i==1
			
 
				+    K = sf2*exp(-K/2).*K;
			
 
				+  elseif i==2
			
 
				+    K = 2*sf2*exp(-K/2);
			
 
				+  else
			
 
				+    error('Unknown hyperparameter')
			
 
				+  end
			
 
				+end
			
--- a/misc/covmin.m
+++ b/misc/covmin.m
@@ -0,0 +1,31 @@
 
				+function K = covmin(hyp, x, z)
			
 
				+% See also COVFUNCTIONS.M.
			
 
				+% borrowed from gpml-toolbox of Rasmussen and Nikkisch
			
 
				+% see http://www.gaussianprocess.org/gpml/code/matlab/doc/
			
 
				+
			
 
				+if nargin<2, K = '0'; return; end                  % report number of parameters
			
 
				+if nargin<3, z = []; end                                   % make sure, z exists
			
 
				+xeqz = numel(z)==0; dg = strcmp(z,'diag') && numel(x)>0;        % determine mode
			
 
				+
			
 
				+ell = exp(0);
			
 
				+% ell = exp(hyp(1));                              % exponent for histogram entries
			
 
				+sf2 = exp(0);
			
 
				+% sf2 = exp(hyp(2));                              % factor for histogram entries
			
 
				+
			
 
				+% precompute min kernel
			
 
				+if dg                                                               % vector kxx
			
 
				+  K = sum(x,2);
			
 
				+%    K = ones(size(x,1),1);
			
 
				+else
			
 
				+  if xeqz                                                 % symmetric matrix Kxx
			
 
				+    K = sf2*min_kernel(x'.^ell);
			
 
				+  else                                                   % cross covariances Kxz
			
 
				+    K = sf2*min_kernel(x'.^ell,z'.^ell);
			
 
				+  end
			
 
				+end
			
 
				+
			
 
				+if nargin<4                                                        % covariances
			
 
				+
			
 
				+else                                                               % derivatives
			
 
				+  error('not yet implemented')
			
 
				+end
			
--- a/misc/min_kernel.m
+++ b/misc/min_kernel.m
@@ -0,0 +1,33 @@
 
				+function C = min_kernel(a, b)
			
 
				+% borrowed from gpml-toolbox of Rasmussen and Nikkisch
			
 
				+% see http://www.gaussianprocess.org/gpml/code/matlab/doc/
			
 
				+
			
 
				+if nargin<1  || nargin>2 || nargout>1, error('Wrong number of arguments.'); end
			
 
				+bsx = exist('bsxfun','builtin');      % since Matlab R2007a 7.4.0 and Octave 3.0
			
 
				+if ~bsx, bsx = exist('bsxfun'); end      % bsxfun is not yes "builtin" in Octave
			
 
				+[D, n] = size(a);
			
 
				+
			
 
				+if nargin==1                                                     % subtract mean
			
 
				+  b = a; m = n;
			
 
				+else
			
 
				+  [d, m] = size(b);
			
 
				+  if d ~= D, error('Error: column lengths must agree.'); end
			
 
				+end
			
 
				+
			
 
				+if bsx                                               % compute squared distances
			
 
				+	C = zeros(n,m);
			
 
				+	% This code is working but still inefficient :(
			
 
				+	if ( n > m )
			
 
				+	  	for i=1:m
			
 
				+				C(:,i)=sum(bsxfun(@min,a,b(:,i)),1)';
			
 
				+		end
			
 
				+	else
			
 
				+		for j=1:n
			
 
				+				C(j,:)=sum(bsxfun(@min,a(:,j),b),1);
			
 
				+		end
			
 
				+	end
			
 
				+
			
 
				+else
			
 
				+	error('not yet implemented...');
			
 
				+end
			
 
				+C = max(C,0);          % numerical noise can cause C to negative i.e. C > -1e-14
			
--- a/misc/sq_dist.m
+++ b/misc/sq_dist.m
@@ -0,0 +1,53 @@
 
				+% sq_dist - a function to compute a matrix of all pairwise squared distances
			
 
				+% between two sets of vectors, stored in the columns of the two matrices, a
			
 
				+% (of size D by n) and b (of size D by m). If only a single argument is given
			
 
				+% or the second matrix is empty, the missing matrix is taken to be identical
			
 
				+% to the first.
			
 
				+%
			
 
				+% Usage: C = sq_dist(a, b)
			
 
				+%    or: C = sq_dist(a)  or equiv.: C = sq_dist(a, [])
			
 
				+%
			
 
				+% Where a is of size Dxn, b is of size Dxm (or empty), C is of size nxm.
			
 
				+%
			
 
				+% Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-12-13.
			
 
				+
			
 
				+function C = sq_dist(a, b)
			
 
				+% borrowed from gpml-toolbox of Rasmussen and Nikkisch
			
 
				+% see http://www.gaussianprocess.org/gpml/code/matlab/doc/
			
 
				+
			
 
				+
			
 
				+if nargin<1  || nargin>3 || nargout>1, error('Wrong number of arguments.'); end
			
 
				+bsx = exist('bsxfun','builtin');      % since Matlab R2007a 7.4.0 and Octave 3.0
			
 
				+if ~bsx, bsx = exist('bsxfun'); end      % bsxfun is not yes "builtin" in Octave
			
 
				+[D, n] = size(a);
			
 
				+
			
 
				+% Computation of a^2 - 2*a*b + b^2 is less stable than (a-b)^2 because numerical
			
 
				+% precision can be lost when both a and b have very large absolute value and the
			
 
				+% same sign. For that reason, we subtract the mean from the data beforehand to
			
 
				+% stabilise the computations. This is OK because the squared error is
			
 
				+% independent of the mean.
			
 
				+if nargin==1                                                     % subtract mean
			
 
				+  mu = mean(a,2);
			
 
				+  if bsx
			
 
				+    a = bsxfun(@minus,a,mu);
			
 
				+  else
			
 
				+    a = a - repmat(mu,1,size(a,2));  
			
 
				+  end
			
 
				+  b = a; m = n;
			
 
				+else
			
 
				+  [d, m] = size(b);
			
 
				+  if d ~= D, error('Error: column lengths must agree.'); end
			
 
				+  mu = (m/(n+m))*mean(b,2) + (n/(n+m))*mean(a,2);
			
 
				+  if bsx
			
 
				+    a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
			
 
				+  else
			
 
				+    a = a - repmat(mu,1,n);  b = b - repmat(mu,1,m);
			
 
				+  end
			
 
				+end
			
 
				+
			
 
				+if bsx                                               % compute squared distances
			
 
				+  C = bsxfun(@plus,sum(a.*a,1)',bsxfun(@minus,sum(b.*b,1),2*a'*b));
			
 
				+else
			
 
				+  C = repmat(sum(a.*a,1)',1,m) + repmat(sum(b.*b,1),n,1) - 2*a'*b;
			
 
				+end
			
 
				+C = max(C,0);          % numerical noise can cause C to negative i.e. C > -1e-14
			
--- a/multiclass_gp_1D_example.m
+++ b/multiclass_gp_1D_example.m
@@ -0,0 +1,61 @@
 
				+function multiclass_gp_1D_example()
			
 
				+% plot GP model outputs for 1D artificial data
			
 
				+% play around with param settings to become familiar with GP models
			
 
				+%
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+
			
 
				+  % get some artificial 1D data
			
 
				+  train_data = [1;2.5;4.5];
			
 
				+  train_labels = [1; 2; 4];
			
 
				+  test_data = (0.1:0.01:5)';
			
 
				+
			
 
				+  % some default settings of hyperparameters
			
 
				+  loghyper = [-1 0];
			
 
				+  gpnoise = 0.01;
			
 
				+
			
 
				+  
			
 
				+  % learn multi-class GP model
			
 
				+  K = feval('covSEiso', loghyper, train_data);
			
 
				+  model = learn_multiclass_gp(K, train_labels, gpnoise);
			
 
				+
			
 
				+  % evaluate model on test data
			
 
				+  Ks = feval('covSEiso', loghyper, train_data, test_data);
			
 
				+  Kss = feval('covSEiso', loghyper, test_data, 'diag');  
			
 
				+  [mu, pred_labels, variance] = test_multiclass_gp(model, Ks, Kss');
			
 
				+
			
 
				+  % visualize everything nicely
			
 
				+  f1 = figure(1);
			
 
				+  plot(train_data(1), 1, 'bx');
			
 
				+  title('Mean curve of each binary task');
			
 
				+
			
 
				+  hold on
			
 
				+  plot(train_data(2), 1, 'gx');
			
 
				+  plot(train_data(3), 1, 'rx');
			
 
				+
			
 
				+  plot(test_data, mu(:,1), 'b');
			
 
				+  plot(test_data, mu(:,2), 'g');
			
 
				+  plot(test_data, mu(:,3), 'r');
			
 
				+  hold off
			
 
				+
			
 
				+  f2 = figure(2);
			
 
				+  plot(test_data, pred_labels, 'kx');
			
 
				+  title('Predicted multi-class labels');
			
 
				+
			
 
				+  f3 = figure(3);
			
 
				+  title('Multi-class posterior mean and variance');
			
 
				+  colors = {'b', 'g', 'r'};
			
 
				+  hold on
			
 
				+  max_mean = max(mu,[],2);
			
 
				+  lower = max_mean-sqrt(variance);
			
 
				+  upper = max_mean+sqrt(variance);
			
 
				+  p = [test_data, lower; flipdim(test_data,1),flipdim(upper,1)];
			
 
				+  fill(p(:,1), p(:,2), 'y');
			
 
				+  for k=1:length(model.unique_labels)
			
 
				+
			
 
				+    tmp_ID = pred_labels == model.unique_labels(k);
			
 
				+    plot(test_data(tmp_ID),mu(tmp_ID,k),colors{k}, 'LineWidth', 2);
			
 
				+
			
 
				+  end
			
 
				+  hold off
			
 
				+
			
 
				+end
			
--- a/simulateIncrementalLearning.m
+++ b/simulateIncrementalLearning.m
@@ -0,0 +1,110 @@
 
				+function results = simulateIncrementalLearning ( labeledData, unlabeledData, testData, b_efficientUpdates, idxToAdd )
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+
			
 
				+    if ( (nargin < 4) || isempty (b_efficientUpdates) )
			
 
				+        b_efficientUpdates = true;
			
 
				+    end
			
 
				+    
			
 
				+    if ( (nargin < 5) || isempty (idxToAdd) )
			
 
				+        idxToAdd = 1:length(unlabeledData.y);
			
 
				+    end    
			
 
				+    
			
 
				+    labeled_X = labeledData.X;
			
 
				+    labeled_y = labeledData.y;
			
 
				+    
			
 
				+    unlabeled_X = unlabeledData.X;
			
 
				+    unlabeled_y = unlabeledData.y;
			
 
				+    
			
 
				+    test_X = testData.X;
			
 
				+    test_y = testData.y;     
			
 
				+
			
 
				+    % output
			
 
				+    ARRscore = zeros ( 1+length(unlabeled_y ) ,1 );
			
 
				+    times = zeros ( 1+length(unlabeled_y ) ,1 );
			
 
				+    
			
 
				+    %% (2.1) initially train the model
			
 
				+    
			
 
				+    % settings
			
 
				+    gpnoise = 0.1;
			
 
				+    cov = {'covmin'};
			
 
				+    loghyper = [];
			
 
				+ 
			
 
				+    K = feval(cov{:},loghyper, labeled_X);
			
 
				+  
			
 
				+    timeStamp_trainStart = tic;
			
 
				+    model = learn_multiclass_gp(K, labeled_y, gpnoise);
			
 
				+    times(1) = toc(timeStamp_trainStart);
			
 
				+  
			
 
				+    
			
 
				+    %% (2.2) test initial model
			
 
				+    %copute similarities to test samples  
			
 
				+    Ks = feval(cov{:},loghyper, labeled_X, test_X);    
			
 
				+ 
			
 
				+    [~, pred_labels] = test_multiclass_gp(model, Ks);
			
 
				+    
			
 
				+    % multi-class accuracy: 1/(# classes) \sum accuracy_per_class
			
 
				+    numCorrect = norm(double(pred_labels == test_y), 1);
			
 
				+    testSize = double(size(test_y,1)); 
			
 
				+    ARRscore(1) = double(numCorrect)/double(testSize);
			
 
				+    
			
 
				+    
			
 
				+    %% (3) perform incremental updates
			
 
				+    
			
 
				+
			
 
				+    
			
 
				+    for i_idxAdd = 1:length(unlabeled_y)
			
 
				+        
			
 
				+        %%%%%%%%%%%%%%
			
 
				+        %% update
			
 
				+        %%%%%%%%%%%%%%
			
 
				+        
			
 
				+        % get new sample to add
			
 
				+        xNew = unlabeled_X( idxToAdd(i_idxAdd), : );
			
 
				+        % simulate asking of user
			
 
				+        yNew = unlabeled_y ( idxToAdd(i_idxAdd) );
			
 
				+        
			
 
				+        if ( b_efficientUpdates )
			
 
				+        
			
 
				+            % compute kernel values
			
 
				+            kss = feval(cov{:},loghyper, xNew);        
			
 
				+            ksNewSample = feval(cov{:},loghyper, labeled_X, xNew);
			
 
				+            
			
 
				+
			
 
				+            timeStamp_updateStart = tic;
			
 
				+            model = update_multiclass_gp(model, kss, ksNewSample, yNew);
			
 
				+            times(i_idxAdd+1) = toc(timeStamp_updateStart);
			
 
				+
			
 
				+            labeled_X = [ labeled_X; xNew];
			
 
				+        else
			
 
				+            kss = feval(cov{:},loghyper, xNew);        
			
 
				+            ksNewSample = feval(cov{:},loghyper, labeled_X, xNew); 
			
 
				+            K = [K, ksNewSample;ksNewSample',kss];
			
 
				+            
			
 
				+            labeled_X = [ labeled_X; xNew];
			
 
				+            labeled_y = [ labeled_y; yNew];         
			
 
				+
			
 
				+            timeStamp_updateStart = tic;
			
 
				+            model = learn_multiclass_gp(K, labeled_y, gpnoise);
			
 
				+            times(i_idxAdd+1) = toc(timeStamp_updateStart);
			
 
				+         
			
 
				+        end
			
 
				+        
			
 
				+        
			
 
				+        %%%%%%%%%%%%%
			
 
				+        %% test
			
 
				+        %%%%%%%%%%%%%
			
 
				+        
			
 
				+        Ks = [Ks; feval(cov{:},loghyper, xNew, test_X)]; 
			
 
				+
			
 
				+        [~, pred_labels] = test_multiclass_gp(model, Ks);
			
 
				+
			
 
				+        % multi-class accuracy: 1/(# classes) \sum accuracy_per_class
			
 
				+        numCorrect = norm(double(pred_labels == test_y), 1);
			
 
				+        testSize = double(size(test_y,1)); 
			
 
				+        ARRscore(i_idxAdd+1) = double(numCorrect)/double(testSize);        
			
 
				+    end
			
 
				+    
			
 
				+    results.ARR = ARRscore;
			
 
				+    results.times = times;
			
 
				+end
			
 
				+
			
--- a/test_multiclass_gp.m
+++ b/test_multiclass_gp.m
@@ -0,0 +1,48 @@
 
				+function [pred_mean, pred_labels, pred_var] = test_multiclass_gp(model, Ks, Kss)
			
 
				+% Prediction with multi-class GP models, outputs predictive mean and variance
			
 
				+% 
			
 
				+% function [pred_mean, pred_labels, pred_var] = test_multiclass_gp(model, Ks, Kss)
			
 
				+%
			
 
				+% INPUT: model -- model structure obtained from function "learn_multiclass_gp"
			
 
				+%        Ks -- n x m matrix of kernel values between n training and m test samples
			
 
				+%        Kss -- m x 1 vector of self-similarities of test samples
			
 
				+%
			
 
				+% OUTPUT: pred_mean -- m x c matrix of predicitive mean values for each of the m test samples and each of the c binary tasks 
			
 
				+%         pred_labels -- m x 1 vector containing predicted multi-class label for each test sample
			
 
				+%         pred_var -- m x 1 vector containing predicted variance for each test sample
			
 
				+%
			
 
				+% NOTE: to get the maximum mean value for each test sample out of the c binary tasks, use: max_mean = max(pred_mean,[],2);
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+
			
 
				+    if ( nargin < 2)
			
 
				+        Kss = [];
			
 
				+    end
			
 
				+
			
 
				+  pred_mean = zeros( size(Ks,2),length(model.unique_labels) );
			
 
				+
			
 
				+  % loop over classes and thus over binary one-vs-all tasks
			
 
				+  for k=1:length(model.unique_labels)
			
 
				+
			
 
				+    pred_mean(:,k) = Ks'*model.alpha{k};
			
 
				+  
			
 
				+  end
			
 
				+
			
 
				+  % obtain predicted labels using max-pooling
			
 
				+  [~, maxID] = max(pred_mean,[],2);
			
 
				+  pred_labels = model.unique_labels(maxID);
			
 
				+
			
 
				+  % compute predictive variances if necessary
			
 
				+  if nargout > 2
			
 
				+    
			
 
				+    v = model.L\Ks;
			
 
				+    
			
 
				+    if ( isempty(Kss) ) 
			
 
				+        disp('No Kss given - break!');
			
 
				+        return;
			
 
				+    end
			
 
				+    
			
 
				+    pred_var = Kss' - sum(v .* v)' + model.noise;
			
 
				+
			
 
				+  end
			
 
				+
			
 
				+end
			
--- a/update_multiclass_gp.m
+++ b/update_multiclass_gp.m
@@ -0,0 +1,45 @@
 
				+function model = update_multiclass_gp(modelOld, kss, ksNewSample, newLabel)
			
 
				+% Update a multi-class GP model
			
 
				+% 
			
 
				+% function model = update_multiclass_gp(modelOld, kss, ksNewSample, newLabel)
			
 
				+%
			
 
				+% INPUT: modelOld    -- the old gp-multiclass model (relevant: noise, L, y)
			
 
				+%        kss         -- self similarity of new example
			
 
				+%        ksNewSample -- kernel values between new and old training examples
			
 
				+%        newLabel    -- label of new example
			
 
				+% Copyright (c) by Alexander Freytag, 2013-11-13.
			
 
				+%%
			
 
				+
			
 
				+  % Cholesky update to include the selected sample
			
 
				+  l = modelOld.L\ksNewSample;
			
 
				+  lss = sqrt(kss+modelOld.noise-l'*l);
			
 
				+  L = [modelOld.L, zeros(length(l),1); l', lss ];  
			
 
				+  
			
 
				+  %obtain new labels in a single vector
			
 
				+  newLabels = [modelOld.y; newLabel];
			
 
				+  
			
 
				+  % copy all relevant data
			
 
				+  model.noise = modelOld.noise;
			
 
				+  
			
 
				+  % store relevant new data
			
 
				+  model.L = L;
			
 
				+  
			
 
				+  % obtain unique class labels (length of this vector equals the number of classes)  
			
 
				+  % Note: we can not re-use the previously known labels, since newLabel
			
 
				+  % might contain a completely new class
			
 
				+  model.unique_labels = unique(newLabels);
			
 
				+
			
 
				+  % loop over classes and thus over binary one-vs-all tasks
			
 
				+  for k=1:length(model.unique_labels)
			
 
				+
			
 
				+    % binary label vector for each binary task
			
 
				+    current_labels = 2*(newLabels==model.unique_labels(k))-1;
			
 
				+
			
 
				+    % precompute and store alpha vector for each binary task
			
 
				+    model.alpha{k} = L'\(L\current_labels);
			
 
				+
			
 
				+  end
			
 
				+  
			
 
				+  model.y = newLabels;
			
 
				+
			
 
				+end