Sfoglia il codice sorgente

Merge 'regrdf' and 'master'

Sven Sickert 12 anni fa
parent
commit
89cbf71371
29 ha cambiato i file con 3054 aggiunte e 8 eliminazioni
  1. 4 4
      regression/gpregression/RegGaussianProcess.cpp
  2. 1 1
      regression/gpregression/RegGaussianProcess.h
  3. 313 0
      regression/progs/testRegressionRDFGP.cpp
  4. 8 0
      regression/randomforest/Makefile
  5. 103 0
      regression/randomforest/Makefile.inc
  6. 167 0
      regression/randomforest/RTBClusterRandom.cpp
  7. 59 0
      regression/randomforest/RTBClusterRandom.h
  8. 190 0
      regression/randomforest/RTBGrid.cpp
  9. 59 0
      regression/randomforest/RTBGrid.h
  10. 289 0
      regression/randomforest/RTBMeanPostImprovement.cpp
  11. 72 0
      regression/randomforest/RTBMeanPostImprovement.h
  12. 228 0
      regression/randomforest/RTBRandom.cpp
  13. 72 0
      regression/randomforest/RTBRandom.h
  14. 345 0
      regression/randomforest/RegRandomForests.cpp
  15. 128 0
      regression/randomforest/RegRandomForests.h
  16. 146 0
      regression/randomforest/RegressionNode.cpp
  17. 92 0
      regression/randomforest/RegressionNode.h
  18. 257 0
      regression/randomforest/RegressionTree.cpp
  19. 78 0
      regression/randomforest/RegressionTree.h
  20. 53 0
      regression/randomforest/RegressionTreeBuilder.cpp
  21. 56 0
      regression/randomforest/RegressionTreeBuilder.h
  22. 1 0
      regression/randomforest/libdepend.inc
  23. 8 0
      regression/regcombination/Makefile
  24. 103 0
      regression/regcombination/Makefile.inc
  25. 153 0
      regression/regcombination/RegPreRandomForests.cpp
  26. 62 0
      regression/regcombination/RegPreRandomForests.h
  27. 3 0
      regression/regcombination/libdepend.inc
  28. 3 2
      regression/regressionbase/RegressionAlgorithmKernel.cpp
  29. 1 1
      regression/regressionbase/RegressionAlgorithmKernel.h

+ 4 - 4
regression/gpregression/RegGaussianProcess.cpp

@@ -68,6 +68,7 @@ RegGaussianProcess::RegGaussianProcess ( const RegGaussianProcess & src ) :
 {
 	kInvY = src.kInvY;
 	verbose = src.verbose;
+  maxIterations = src.maxIterations;
 	optimizeParameters = src.optimizeParameters;
 	optimizationMethod = src.optimizationMethod;
 	traceApproximation = src.traceApproximation;
@@ -83,7 +84,7 @@ RegGaussianProcess::~RegGaussianProcess()
 
 }
 
-void RegGaussianProcess::teach ( KernelData *kernelData, const NICE::Vector & y )
+void RegGaussianProcess::teachKernel ( KernelData *kernelData, const NICE::Vector & y )
 {
 	if ( optimizeParameters ) 
 	{
@@ -111,7 +112,7 @@ void RegGaussianProcess::teach ( KernelData *kernelData, const NICE::Vector & y
 				if ( verbose ) 
 					cerr << "RegGaussianProcess: using conjugate gradient optimizer" << endl;
 
-				FirstOrderRasmussen *optimizer = new FirstOrderRasmussen();
+				FirstOrderRasmussen *optimizer = new FirstOrderRasmussen( verbose );
 				optimizer->setEpsilonG ( 0.01 );
 				optimizer->setMaxIterations ( -maxIterations );
 				optimizer->optimizeFirst ( gpopt );
@@ -137,8 +138,7 @@ void RegGaussianProcess::teach ( KernelData *kernelData, const NICE::Vector & y
 			fthrow(Exception, "KCGPRegression: you have to specify a kernel function !" );
 		}
 	} else {
-
-		if ( !kernelData->hasCholeskyFactorization() )
+		if ( !kernelData->hasCholeskyFactorization() ) 
 			kernelData->updateCholeskyFactorization();
 	}
 

+ 1 - 1
regression/gpregression/RegGaussianProcess.h

@@ -61,7 +61,7 @@ class RegGaussianProcess : public RegressionAlgorithmKernel
 		/** learn parameters/models/whatever with a kernel matrix of a set
 		 *  of vectors and the corresponding function values \c y 
 		 */
-		void teach ( KernelData *kernelData, const NICE::Vector & y );
+		void teachKernel ( KernelData *kernelData, const NICE::Vector & y );
 
 		/** predict the function value for a vector by using its kernel values with
 		 * the used training set, be careful with the order in \c kernelVector

+ 313 - 0
regression/progs/testRegressionRDFGP.cpp

@@ -0,0 +1,313 @@
+/**
+* @file testRegressionRDFGP.cpp
+* @brief test of RDF with GP
+* @author Sven Sickert
+* @date 07/02/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/regcombination/RegPreRandomForests.h"
+#include "vislearning/regression/gpregression/RegGaussianProcess.h"
+
+#include "vislearning/math/kernels/KernelExp.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame ( Config confRDF,
+                 NICE::VVector &xdata,
+                 NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  
+  int trainingSize = (int)(.2*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = confRDF.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveForest = confRDF.gB( "debug", "save_forest", false );
+  string leafReg = confRDF.gS( "PreRandomForest", "leaf_regression", "gp" );
+  
+  KernelExp *kernel_template = new KernelExp ( confRDF.gD("Kernel", "log_rbf_gamma", -2.5), 0.0 );
+  
+  /*------------Store Configuration------------*/
+  string filename = confRDF.gS( "debug", "filename" );
+  
+  if ( saveForest )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+    storeEvalData << "random_split_tests=" << confRDF.gI ( "RTBRandom", "random_split_tests" ) << endl;
+    storeEvalData << "random_features=" << confRDF.gI ( "RTBRandom", "random_features" ) << endl;
+    storeEvalData << "max_depth=" << confRDF.gI ( "RTBRandom", "max_depth" ) << endl;
+    storeEvalData << "random_split_mode=" << confRDF.gS ( "RTBRandom", "random_split_mode" ) << endl;
+    storeEvalData << "min_examples=" << confRDF.gI ( "RTBRandom", "min_examples" ) << endl;
+    storeEvalData << "number_of_trees=" << confRDF.gI ( "RandomForest", "number_of_trees" ) << endl;
+    storeEvalData << "features_per_tree=" << confRDF.gD ( "RandomForest", "features_per_tree" ) << endl;
+    storeEvalData << "samples_per_tree=" << confRDF.gD ( "RandomForest", "samples_per_tree" ) << endl;
+    storeEvalData << "builder=" << confRDF.gS ( "RandomForest", "builder" ) << endl;
+    storeEvalData << "minimum_error_reduction=" << confRDF.gD ( "RandomForest", "minimum_error_reduction" ) << endl;
+    storeEvalData << "log_rbf_gamma=" << confRDF.gD ( "Kernel", "log_rbf_gamma" ) << endl;
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up PreRDF--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+    cerr << "Initializing leaf regression method " << leafReg << "...";
+    RegressionAlgorithm *leafRegression = NULL;
+    if ( leafReg == "GaussProcess" )
+    {
+      Kernel *kernel_function = NULL;
+      kernel_function = new KernelExp ( *(kernel_template) );
+      leafRegression = new RegGaussianProcess( &confRDF, kernel_function, "GPRegression" );
+    }
+    else if ( leafReg == "none" ) {
+      cerr << "\ntestRegressionRDFGP::testFrame: No leaf regression method set! Using RandomForest prediction..." << endl;
+    } else {
+      cerr << "\ntestRegressionRDFGP::testFrame: No valid leaf regression method set! Aborting..." << endl;
+      exit(-1);
+    }
+    cerr << "Finished." << endl;
+
+    cerr << "Initializing PreRDF for regression...";
+    RegPreRandomForests *prf = new RegPreRandomForests ( &confRDF, "PreRandomForest", leafRegression );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the PreRDF for regression...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    prf->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = prf->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //Config for RFGP
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+

+ 8 - 0
regression/randomforest/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/randomforest/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 167 - 0
regression/randomforest/RTBClusterRandom.cpp

@@ -0,0 +1,167 @@
+/**
+* @file RTBClusterRandom.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/19/2013
+
+*/
+#include <iostream>
+
+#include "RTBClusterRandom.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBClusterRandom::RTBClusterRandom( const Config *conf, std::string section )
+{
+  max_depth = conf->gI(section, "max_depth", 20 );
+  min_examples = conf->gI(section, "min_examples", 10);
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBClusterRandom::~RTBClusterRandom()
+{
+}
+
+bool RTBClusterRandom::balancingLeftRight(const vector< pair< double, int > > values,
+          double threshold,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+    }
+    else
+    {
+      count_right++;
+    }
+  }
+  
+#ifdef DETAILTREE
+  fprintf (stderr, "left vs. right: %d : %d\n", count_left, count_right );
+#endif
+  
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  return true;
+}
+
+RegressionNode *RTBClusterRandom::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBClusterRandom: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBClusterRandom: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  vector<pair<double, int> > values;
+  
+  int f = rand() % x[0].size();
+    
+  values.clear();
+  collectFeatureValues ( x, selection, f, values );
+    
+  double median   = (values.begin() + values.size() / 2)->first;
+    
+#ifdef DETAILTREE
+  double minValue = (min_element ( values.begin(), values.end() ))->first;
+  double maxValue = (max_element ( values.begin(), values.end() ))->first;
+  fprintf (stderr, "max %f min %f med %f\n", maxValue, minValue, median );
+#endif
+    
+  int count_left, count_right;
+  if ( ! balancingLeftRight( values, median, count_left, count_right) )
+  {
+    fprintf ( stderr, "RTBClusterRandom: no split possible (empty leaf)\n" );
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+      
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %d\n", median, f );
+#endif
+  
+  node->f = f;
+  node->threshold = median;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < median )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBClusterRandom::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 59 - 0
regression/randomforest/RTBClusterRandom.h

@@ -0,0 +1,59 @@
+/**
+* @file RTBClusterRandom.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/19/2013
+
+*/
+#ifndef RTBCLUSTERRANDOMINCLUDE
+#define RTBCLUSTERRANDOMINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBClusterRandom : public RegressionTreeBuilder
+{
+  
+  protected:
+    int max_depth;
+    int min_examples;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    bool balancingLeftRight ( const std::vector< std::pair< double, int > > values,
+          double threshold,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBClusterRandom( const NICE::Config *conf, std::string section = "RTBClusterRandom" );
+    
+    /** simple destructor */
+    virtual ~RTBClusterRandom();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+}
+
+#endif

+ 190 - 0
regression/randomforest/RTBGrid.cpp

@@ -0,0 +1,190 @@
+/**
+* @file RTBGrid.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/15/2013
+
+*/
+#include <iostream>
+
+#include "RTBGrid.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBGrid::RTBGrid( const Config *conf, std::string section )
+{
+  max_depth = conf->gI(section, "max_depth", 20 );
+  min_examples = conf->gI(section, "min_examples", 10);
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBGrid::~RTBGrid()
+{
+}
+
+bool RTBGrid::balancingLeftRight(const vector< pair< double, int > > values,
+          double threshold,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+    }
+    else
+    {
+      count_right++;
+    }
+  }
+  
+#ifdef DETAILTREE
+  fprintf (stderr, "left vs. right: %d : %d\n", count_left, count_right );
+#endif
+  
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  return true;
+}
+
+RegressionNode *RTBGrid::buildRecursive ( const NICE::VVector & x,
+          const std::vector<std::vector<double> > & limits,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBGrid: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBGrid: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  vector<pair<double, int> > values;
+  
+  int f = depth % x[0].size();
+    
+  values.clear();
+  collectFeatureValues ( x, selection, f, values );
+    
+#ifdef DETAILTREE
+  double minValue = (min_element ( values.begin(), values.end() ))->first;
+  double maxValue = (max_element ( values.begin(), values.end() ))->first;
+  fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    
+  double threshold = 0.5 * (limits[f][0]+limits[f][1]);
+  int tmp = depth;
+  while( tmp > (int)x[0].size() )
+  {
+    threshold *= 0.5;
+    tmp -= x[0].size();
+  }
+      
+  int count_left, count_right;
+  if ( ! balancingLeftRight( values, threshold, count_left, count_right) )
+  {
+    fprintf ( stderr, "RTBGrid: no split possible (empty leaf)\n" );
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+      
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %d\n", threshold, f );
+#endif
+  
+  node->f = f;
+  node->threshold = threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, limits, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, limits, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBGrid::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  // get min/max values for all features
+  int fcount = x[0].size();
+  vector< vector<double> > limits;
+  for ( int j = 0; j < fcount; j++ )
+  {
+    double min = numeric_limits<double>::max();
+    double max = numeric_limits<double>::min();
+    for ( int i = 0; i < x.size(); i++ )
+    {
+      double value = x[i][j];
+      if (value > max ) max = value;
+      if (value < min ) min = value;
+    }
+    vector<double> flimit;
+    flimit.push_back(min);
+    flimit.push_back(max);
+    limits.push_back(flimit);
+  }
+  
+  return buildRecursive( x, limits, all, 0);
+}

+ 59 - 0
regression/randomforest/RTBGrid.h

@@ -0,0 +1,59 @@
+/**
+* @file RTBGrid.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/15/2013
+
+*/
+#ifndef RTBGRIDINCLUDE
+#define RTBGRIDINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBGrid : public RegressionTreeBuilder
+{
+  
+  protected:
+    int max_depth;
+    int min_examples;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const std::vector< std::vector< double > > & limits,
+          std::vector<int> & selection,
+          int depth);
+
+    bool balancingLeftRight ( const std::vector< std::pair< double, int > > values,
+          double threshold,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBGrid( const NICE::Config *conf, std::string section = "RTBGrid" );
+    
+    /** simple destructor */
+    virtual ~RTBGrid();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+}
+
+#endif

+ 289 - 0
regression/randomforest/RTBMeanPostImprovement.cpp

@@ -0,0 +1,289 @@
+/**
+* @file RTBMeanPostImprovement.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/23/2013
+
+*/
+#define _USE_MATH_DEFINES
+
+#include <iostream>
+#include <math.h>
+#include "RTBMeanPostImprovement.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBMeanPostImprovement::RTBMeanPostImprovement( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_improvement = conf->gD("RandomForest", "minimum_improvement", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  auto_bandwith = conf->gB(section, "auto_bandwith", true);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBMeanPostImprovement::~RTBMeanPostImprovement()
+{
+}
+
+bool RTBMeanPostImprovement::improvementLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          vector<double> & empDist_left,
+          vector<double> & empDist_right,
+          int& count_left,
+          int& count_right,
+          double& h,
+          double& p )
+{
+  count_left = 0;
+  count_right = 0;
+  vector<double> selection_left;
+  vector<double> selection_right;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    if ( (it->first) < threshold )
+    {
+      count_left++;
+      selection_left.push_back( y[ it->second ] );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( y[ it->second ] );
+    }
+  }
+  
+  if ( (count_left < min_examples) || (count_right < min_examples) )
+    return false; // no split
+  
+  Vector vleft ( selection_left );
+  Vector vright ( selection_right );
+  
+  // empirical distribution [Taylor & Jones, 1996]
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double yval = y[ it->second ];
+    int smaller_left = 0;
+    int smaller_right = 0;
+    for ( int l = 0; l < count_left; l++ )
+    {
+      if ( selection_left[l] <= yval ) smaller_left++;
+    }
+    for ( int r = 0; r < count_right; r++ )
+    {
+      if ( selection_right[r] <= yval ) smaller_right++;
+    }
+    if ( (it->first) < threshold )
+    {
+      double emp = (double)(smaller_left)/(double)values.size();
+      empDist_left.push_back( emp );
+    } else {
+      double emp = (double)(smaller_right)/(double)values.size();
+      empDist_right.push_back( emp );
+    }
+  }
+  
+  // bandwidth parameter [Taylor & Jones, 1996]
+  if (auto_bandwith)
+  {
+    double sigma_hat = sqrt( vleft.StdDev()*vleft.StdDev() + vright.StdDev()*vright.StdDev() );
+    double z_hat = (double)( vleft.Mean() - vright.Mean() ) / sigma_hat;
+    p = (double)count_left / (double)values.size();
+    double tmp = (z_hat*z_hat - 1);
+    h = sigma_hat / (double)( 2 * sqrt(M_PI) * p * (1-p) * tmp*tmp * gaussianVal(z_hat, 1.0) );
+  }
+  else
+    h = 1.0;
+  
+  return true;
+}
+
+double RTBMeanPostImprovement::gaussianVal ( const double input,
+          const double bandwidth )
+{
+  return ( 1 / ( sqrt( 2 * M_PI ) * sqrt(2) * bandwidth ) * exp ( -0.25 * input * input ) );
+}
+
+RegressionNode *RTBMeanPostImprovement::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBMeanPostImprovement: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMeanPostImprovement: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_improvement = -1.0;
+  vector<pair<double, int> > values;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+    ofstream datafile;
+    char buffer [20];
+    int n = sprintf(buffer, "detailtree%d.dat", k);
+    datafile.open( buffer );
+    datafile << "# This file is called detailtree.dat" << endl;
+    datafile << "# Data of the Mean Posterior Improvement Criterium" << endl;
+    datafile << "# threshold \tI \t\tMPI" << endl;
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      //double step = (maxValue - minValue) / random_split_tests;
+      //threshold = minValue + i*step;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s (t) %d/%d (%f)\n", k, i, threshold );
+#endif
+      
+      vector<double> empDist_left, empDist_right;
+      int count_left, count_right;
+      double h, p;
+      if ( ! improvementLeftRight( values, y, threshold, empDist_left,
+          empDist_right, count_left, count_right, h, p) )
+        continue;
+      
+      // mean posterior improvement
+      double I_hat = 0.0;
+      for ( int l = 0; l < count_left; l++ )
+      {
+        for ( int r = 0; r < count_right; r++ ) 
+        {
+          I_hat += gaussianVal( (empDist_left[l] - empDist_right[r]), h );
+          //I_hat += (empDist_left[l] - empDist_right[r]);
+        }
+      }
+      I_hat /= ((double)count_left*(double)count_right);
+      double mpi_hat = p * (1-p) * (1-I_hat);
+
+#ifdef DETAILTREE
+      fprintf (stderr, "pL=%f, pR=%f, I=%f --> M=%f\n", p, (1-p), I_hat, mpi_hat);
+      datafile << threshold << " " << I_hat << " " << mpi_hat << endl;
+#endif      
+      
+      if ( mpi_hat > best_improvement )
+      {
+        best_improvement = mpi_hat;
+        best_threshold =  threshold;
+        best_feature = f;
+      }
+    }
+#ifdef DETAILTREE
+    datafile.close();
+#endif    
+  }
+
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+  
+  if ( best_improvement < minimum_improvement )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMeanPostImprovement: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBMeanPostImprovement::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 72 - 0
regression/randomforest/RTBMeanPostImprovement.h

@@ -0,0 +1,72 @@
+/**
+* @file RTBMeanPostImprovement.h
+* @brief regression tree splitting criteria by Taylor and Jones, 1996
+* @author Sven Sickert
+* @date 07/23/2013
+
+*/
+#ifndef RTBMEANPOSTIMPROVEMENTINCLUDE
+#define RTBMEANPOSTIMPROVEMENTINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** regression tree splitting criteria by Taylor and Jones, 1996 */
+class RTBMeanPostImprovement : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_improvement;
+    
+    bool auto_bandwith;
+    
+    /** save indices in leaves */
+    bool save_indices;
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+    
+    double gaussianVal( const double input, 
+          const double bandwidth );
+
+    bool improvementLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          std::vector<double> & empDist_left,
+          std::vector<double> & empDist_right,
+          int & count_left,
+          int & count_right,
+          double& h,
+          double& p );
+
+  public:
+    
+    /** simple constructor */
+    RTBMeanPostImprovement( const NICE::Config *conf, std::string section = "RTBMeanPostImprovement" );
+    
+    /** simple destructor */
+    virtual ~RTBMeanPostImprovement();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif

+ 228 - 0
regression/randomforest/RTBRandom.cpp

@@ -0,0 +1,228 @@
+/**
+* @file RTBRandom.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include "RTBRandom.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBRandom::RTBRandom( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBRandom::~RTBRandom()
+{
+}
+
+bool RTBRandom::errorReductionLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          double& error_left,
+          double& error_right,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  vector<int> selection_left;
+  vector<int> selection_right;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+      selection_left.push_back( it->second );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( it->second );
+    }
+  }
+  
+//   if ( (count_left == 0) || (count_right == 0) )
+//     return false; // no split
+  
+  if ( (count_left < min_examples)  || (count_right < min_examples) )
+    return false; // no split
+  
+  RegressionNode *left = new RegressionNode ();
+  left->nodePrediction( y, selection_left );
+  error_left = left->lsError;
+  delete left;
+  
+  RegressionNode *right = new RegressionNode ();
+  right->nodePrediction( y, selection_right );
+  error_right = right->lsError;
+  delete right;
+  
+  return true;
+}
+
+RegressionNode *RTBRandom::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBRandom: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBRandom: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_reduct = -1.0;
+  vector<pair<double, int> > best_values;
+  vector<pair<double, int> > values;
+  double lsError_left = 0.0;
+  double lsError_right = 0.0;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
+#endif
+      lsError_left = 0.0;
+      lsError_right = 0.0;
+      
+      int count_left, count_right;
+      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
+          lsError_right, count_left, count_right) )
+        continue;
+      
+      //double pl = (count_left) / (count_left +count_right);
+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
+      double errorReduction = lsError - lsError_left - lsError_right;
+      
+      if ( errorReduction > best_reduct )
+      {
+        best_reduct = errorReduction;
+        best_threshold =  threshold;
+        best_feature = f;
+#ifdef DETAILTREE
+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+      }
+    }
+  }
+  
+  if ( best_reduct < minimum_error_reduction )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBRandom: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBRandom::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 72 - 0
regression/randomforest/RTBRandom.h

@@ -0,0 +1,72 @@
+/**
+* @file RTBRandom.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#ifndef RTBRANDOMINCLUDE
+#define RTBRANDOMINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBRandom : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_error_reduction;
+    
+    int random_split_mode;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    enum {
+      RANDOM_SPLIT_INDEX = 0,
+      RANDOM_SPLIT_UNIFORM
+    };
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    bool errorReductionLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          double & error_left,
+          double & error_right,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBRandom( const NICE::Config *conf, std::string section = "RTBRandom" );
+    
+    /** simple destructor */
+    virtual ~RTBRandom();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif

+ 345 - 0
regression/randomforest/RegRandomForests.cpp

@@ -0,0 +1,345 @@
+/**
+* @file RegRandomForests.cpp
+* @brief implementation of random set forests for regression
+* @author Sven Sickert
+* @date 06/28/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <assert.h>
+
+#include "vislearning/regression/randomforest/RegRandomForests.h"
+#include "vislearning/regression/randomforest/RTBRandom.h"
+#include "vislearning/regression/randomforest/RTBGrid.h"
+#include "vislearning/regression/randomforest/RTBClusterRandom.h"
+#include "vislearning/regression/randomforest/RTBMeanPostImprovement.h"
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+RegRandomForests::RegRandomForests()
+{
+  builder = NULL;
+  minimum_error_reduction = 0.0;
+  enableOutOfBagEstimates = false;
+}
+
+RegRandomForests::RegRandomForests( const Config *_conf,
+          std::string section ) : conf(_conf)
+{
+  std::string builder_method = conf->gS(section, "builder", "random");
+  minimum_error_reduction = conf->gD(section, "minimum_error_reduction", 10e-3);
+  enableOutOfBagEstimates = conf->gB(section, "enable_out_of_bag_estimates", false);
+  
+  confsection = section;
+  
+  if ( builder_method == "none" ) {
+    // do not initialize
+    builder = NULL;
+  }
+  else {
+    number_of_trees = conf->gI(section, "number_of_trees", 20 );
+    features_per_tree = conf->gD(section, "features_per_tree", 1.0 );
+    samples_per_tree  = conf->gD(section, "samples_per_tree", 0.2 );
+    
+    if ( builder_method == "random" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
+      builder = new RTBRandom ( conf, builder_section );
+    }
+    else if ( builder_method == "grid" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBGrid");
+      builder = new RTBGrid ( conf, builder_section );
+    }
+    else if ( builder_method == "cluster_random" ) 
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBClusterRandom");
+      builder = new RTBClusterRandom ( conf, builder_section );
+    }
+    else if ( builder_method == "mean_post_improvement" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBMeanPostImprovement");
+      builder = new RTBMeanPostImprovement ( conf, builder_section );
+    } else {
+      fprintf (stderr, "RegressionTreeBuilder %s not yet implemented !\n", builder_method.c_str() );
+      exit(-1);
+    }
+  } 
+}
+
+RegRandomForests::~RegRandomForests()
+{
+  for ( vector<RegressionTree *>::iterator it = forest.begin();
+             it != forest.end(); it++ )
+    delete (*it);
+  
+  if ( builder != NULL )
+    delete builder;
+}
+
+void RegRandomForests::calcOutOfBagEstimates (
+          std::vector< std::vector<int> > & outofbagtrees,
+          NICE::VVector x,
+          NICE::Vector y )
+{
+  oobResults.clear();
+  
+  // calculate out of bag regression results
+  // as suggested bei Breiman
+  // out of bag = training data not used to build
+  // a single tree is used as testing data for the tree
+  long index = 0;
+  for ( int i = 0; i < (int)x.size(); i++, index++ )
+  {
+    double trueValue = y[i];
+    const vector<int> & trees = outofbagtrees[index];
+    
+    if ( trees.size() <= 0 ) continue;
+    
+    double predValue = predict ( x[i], trees );
+    
+    double predError = abs( trueValue - predValue );
+    oobResults.push_back ( pair<double, double> ( predError, trueValue ) );
+  }
+}
+
+void RegRandomForests::getLeafNodes ( NICE::Vector x,
+          std::vector<RegressionNode *> & leafNodes,
+          int depth )
+{
+  leafNodes.reserve ( forest.size() );
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+        it != forest.end(); it++ )
+  {
+    RegressionTree & rt = *(*it);
+    RegressionNode *leaf = rt.getLeafNode ( x, depth );
+    leafNodes.push_back ( leaf );
+  }
+}
+
+void RegRandomForests::getAllLeafNodes ( vector<RegressionNode *> & leafNodes)
+{
+  int z = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++, z++ )
+  {
+    RegressionTree & rt = *(*it);
+    vector<RegressionNode *> leaves = rt.getAllLeafNodes();
+    for ( int j = 0; j < (int)leaves.size(); j++ )
+    {
+      for ( int k = 0; k < (int)leaves[j]->trainExamplesIndices.size(); k++ )
+      {
+        leaves[j]->trainExamplesIndices[k] = exselection[z][leaves[j]->trainExamplesIndices[k]];
+      }
+      leafNodes.push_back(leaves[j]);
+    }
+  }
+}
+
+void RegRandomForests::teach ( const NICE::VVector & x, const NICE::Vector & y )
+{
+  cerr << "RegRandomForests::teach()" << endl;
+  assert( builder != NULL );
+  
+  int featuresCount = (int) (x[0].size() * features_per_tree );
+  fprintf(stderr, "RegRandomForests: number of features %d\n", (int)x[0].size() );
+  
+  vector< vector<int> > outofbagtrees;
+  outofbagtrees.resize( x.size() );
+  
+  for ( int k = 0; k < number_of_trees; k++ )
+  {
+    vector<int> tmp;
+    exselection.push_back(tmp);
+  }
+  
+  #pragma omp parallel for
+  for ( int k = 0; k < number_of_trees; k++ )
+  {
+    fprintf( stderr, "[ -- building tree %d/%d -- ]\n", k + 1, number_of_trees);
+    
+    vector<int> examples_index;
+    for ( int i = 0; i < (int)x.size(); i++ )
+    {
+      examples_index.push_back( i );
+    }
+    
+    int trainingExamples = (int)(examples_index.size() * samples_per_tree);
+    fprintf (stderr, "RegRandomForests: selection of %d examples for each tree\n", trainingExamples );
+    
+    if ( (trainingExamples < 3) && ((int)examples_index.size() > trainingExamples) )
+    {
+      fprintf(stderr, "RegRandomForests: number of examples < 3 !! minExamples=%d, trainingExamples=%d\n",
+                      (int)x.size(), trainingExamples);
+      trainingExamples = examples_index.size();
+      fprintf(stderr, "RegRandomForests: I will use all %d examples. !!\n", trainingExamples);
+    }
+    
+    if ( samples_per_tree < 1.0 )
+      random_shuffle( examples_index.begin(), examples_index.end() );
+    
+    VVector subset;
+    Vector subval ( trainingExamples );
+    for ( int e = 0; e < trainingExamples; e++ )
+    {
+      exselection[k].push_back( examples_index[e] );
+      subset.push_back( x[ examples_index[e] ] );
+      subval.set( e, y[ examples_index[e] ] );
+    }
+        
+    // set out of bag trees
+    for ( uint e = trainingExamples; e < examples_index.size(); e++ )
+    {
+      int index = examples_index[e];
+      #pragma omp critical
+      outofbagtrees[index].push_back(k);
+    }
+    
+    /******* select a random feature set *******/
+    vector<int> features_subset;
+    for ( int j = 0; j < (int)x[0].size(); j++ )
+      features_subset.push_back( j );
+    
+    random_shuffle( features_subset.begin(), features_subset.end() );
+    while ((int)features_subset.size() > featuresCount)
+      features_subset.pop_back();
+    
+    /******* training of an individual tree ****/
+    RegressionTree *tree = new RegressionTree( conf );
+    
+    builder->build( *tree, subset, subval );
+    
+    /******* prune tree using least squares criterion *****/
+    //if ( minimum_error_reduction > 0.0 )
+    //  tree->pruneTreeLeastSquares( minimum_error_reduction );
+    
+    /******* add individual tree to ensemble *****/
+    #pragma omp critical
+    forest.push_back(tree);
+  }
+  
+  if (enableOutOfBagEstimates)
+    calcOutOfBagEstimates(outofbagtrees, x, y);
+}
+
+double RegRandomForests::predict ( const NICE::Vector & x, 
+          const vector< int > & outofbagtrees )
+{
+  // predict using only a selection of all trees
+  // contained in outofbagtrees
+  
+  double overall_prediction = 0.0;
+  int treecount = 0;
+  
+  for ( vector<int>::const_iterator it = outofbagtrees.begin();
+        it != outofbagtrees.end();
+        it++ )
+  {
+    assert ( *it < (int)forest.size() );
+    RegressionTree & rt = *(forest[(*it)]);
+    double predVal;
+    rt.traverse( x, predVal );
+    
+    overall_prediction += predVal;
+    treecount++;
+  }
+  
+  overall_prediction /= treecount;
+  
+  return overall_prediction;
+}
+
+
+double RegRandomForests::predict ( const NICE::Vector & x )
+{
+  double overall_prediction = 0.0;
+  int treecount = 0;
+  
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+        it != forest.end();
+        it++ )
+  {
+    RegressionTree & rt = *(*it);
+    double predVal;
+    rt.traverse( x, predVal );
+    
+    overall_prediction += predVal;
+    treecount++;
+  }
+  
+  overall_prediction /= treecount;
+  
+  return overall_prediction;
+}
+
+void RegRandomForests::restore(istream & is, int format)
+{
+  std::string tag;
+  int index;
+
+  while ( (is >> tag) && (tag == "TREE") )
+  {
+    is >> index;
+    RegressionTree *rt = new RegressionTree ( conf );
+    rt->restore ( is );
+    if ( minimum_error_reduction > 0.0 )
+      rt->pruneTreeLeastSquares ( minimum_error_reduction );
+
+    forest.push_back(rt);
+  }
+}
+
+void RegRandomForests::store(ostream & os, int format) const
+{
+  int index = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++, index++ )
+  {
+    const RegressionTree & rt = *(*it);
+    os << "TREE " << index << endl;
+    rt.store ( os, format );
+    os << "ENDTREE ";
+  }
+}
+
+void RegRandomForests::clear()
+{
+  for ( vector<RegressionTree *>::iterator it = forest.begin();
+          it != forest.end(); it++ )
+    delete (*it);
+
+  forest.clear();
+}
+
+void RegRandomForests::indexDescendants(
+          map<RegressionNode *, pair<long, int> > & index) const
+{
+  long maxindex = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++ )
+    (*it)->indexDescendants ( index, maxindex );
+}
+
+void RegRandomForests::resetCounters()
+{
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++ )
+    (*it)->resetCounters ();
+}
+
+void RegRandomForests::setComplexity(int size)
+{
+    fprintf (stderr, "RegRandomForests: set complexity to %d, overwriting current value %d\n", 
+    size, number_of_trees );
+    number_of_trees = size;
+}
+

+ 128 - 0
regression/randomforest/RegRandomForests.h

@@ -0,0 +1,128 @@
+/**
+ * @file RegRandomForests.h
+ * @brief implementation of random set forest for regression
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRANDOMFORESTSINCLUDE
+#define REGRANDOMFORESTSINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+#include "vislearning/regression/randomforest/RegressionTree.h"
+#include "vislearning/regression/randomforest/RegressionTreeBuilder.h"
+
+
+namespace OBJREC
+{
+  
+/** implementation of random set forests for regression */
+class RegRandomForests : public RegressionAlgorithm
+{
+  protected:
+     /** vector containing all decision trees for regression */
+    std::vector<RegressionTree *> forest;
+
+    /** number of trees which will be generated during training */
+    int number_of_trees;
+
+    /** fraction of features used for each tree */
+    double features_per_tree;
+
+    /** fraction of training examples used for each tree */
+    double samples_per_tree;
+
+    /** if >0 then prune the trees using pruneTreeLeastSquares */
+    double minimum_error_reduction;
+
+    /** stored config to initialize a tree */
+    const NICE::Config *conf;
+
+    /** config section containing important config values */
+    std::string confsection;
+
+    /** pointer to the tree builder method */
+    RegressionTreeBuilder *builder;
+
+    /** calculate out-of-bag statistics or not */
+    bool enableOutOfBagEstimates;
+    
+    /** out-of-bag statistics */
+    std::vector<std::pair<double, double> > oobResults;
+
+    /** predict using only a subset of all trees */
+    double predict ( const NICE::Vector & x,
+          const std::vector<int> & outofbagtrees );
+
+    /** calculate out-of-bag statistics */
+    void calcOutOfBagEstimates ( std::vector< std::vector<int> > & outofbagtrees,
+          NICE::VVector x,
+          NICE::Vector y );
+
+    /** save example selection per tree */
+    std::vector<std::vector<int> > exselection;
+    
+  public:
+    
+    /** initialize the regression method */
+    RegRandomForests ( const NICE::Config *conf,
+          std::string section );
+    
+    /** do nothing */
+    RegRandomForests ();
+    
+    /** simple destructor */
+    virtual ~RegRandomForests();
+    
+    /** learn parameters/models/whatever using a set of vectors and
+     *  their corresponding function values
+     */
+    void teach ( const NICE::VVector & x, const NICE::Vector & y );
+    
+    /** main prediction function */
+    double predict ( const NICE::Vector & x );
+
+    /** get all leaf nodes for a given value (or inner nodes if depth is set to the level) */
+    void getLeafNodes ( NICE::Vector x,
+          std::vector<RegressionNode *> & leafNodes,
+          int depth = 100000 );
+    
+    /** get all leaf nodes (or inner nodes if depth is set to the level) */
+    void getAllLeafNodes ( std::vector<RegressionNode *> & leafNodes );
+
+    /** enumerate all nodes within the trees */
+    void indexDescendants ( std::map<RegressionNode *, std::pair<long, int> > & index ) const;
+
+    /** reset all counters in all nodes contained in the forest */
+    void resetCounters ();
+    
+    /** clone function */
+    virtual RegRandomForests *clone ( void ) const
+    {
+      fthrow ( NICE::Exception, "clone() not yet implemented!\n" );
+    }
+    
+    /** get out of bag estimates */
+    std::vector<std::pair<double, double> > & getOutOfBagResults ()
+    {
+      return oobResults;
+    };
+    
+    /** set the number of trees */
+    void setComplexity ( int size );
+    
+    /** IO functions */
+    void restore ( std::istream & is, int format = 0 );
+    void store ( std::ostream & os, int format = 0 ) const;
+    void clear ();
+};
+  
+} // namespace
+
+#endif

+ 146 - 0
regression/randomforest/RegressionNode.cpp

@@ -0,0 +1,146 @@
+/**
+* @file RegressionNode.cpp
+* @brief regression node
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include "vislearning/regression/randomforest/RegressionNode.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RegressionNode::~RegressionNode()
+{
+}
+
+RegressionNode::RegressionNode ()
+{
+  left = NULL;
+  right = NULL;
+  f = 0;
+  counter = 0;
+}
+
+RegressionNode *RegressionNode::getLeafNode (
+          const NICE::Vector & x,
+          int depth )
+{
+  if ( (!depth) || ((left == NULL) && (right == NULL)) )
+    return this;
+  
+  double val = x[f];
+  if ( val < threshold )
+    if ( left != NULL )
+      return left->getLeafNode ( x, depth - 1 );
+    else
+      return this;
+  else
+    if ( right != NULL )
+      return right->getLeafNode( x, depth - 1 );
+    else
+      return this;
+}
+
+void RegressionNode::traverse (
+          const NICE::Vector & x,
+          double & _predVal
+                              )
+{
+  RegressionNode *leaf = getLeafNode ( x );
+  _predVal = leaf->predVal;
+}
+
+void RegressionNode::statistics ( int & depth, int & count ) const
+{
+  int dl, cl;
+  if ( left != NULL )
+  {
+    left->statistics ( dl, cl );
+    dl++;
+  } else {
+    dl = 0;
+    cl = 0;
+  }
+  
+  if ( right != NULL )
+  {
+    right->statistics( depth, count );
+    depth++;
+  } else {
+    depth = 0;
+    count = 0;
+  }
+  
+  depth = (depth > dl) ? depth : dl;
+  count += cl + 1;
+}
+
+void RegressionNode::indexDescendants (
+          map<RegressionNode *, pair<long, int> > & index,
+          long & maxindex,
+          int depth ) const
+{
+  if ( left != NULL )
+  {
+    maxindex++;
+    index.insert ( pair<RegressionNode *, pair<long, int> > ( left, pair<long, int>(maxindex, depth + 1) ) );
+    left->indexDescendants ( index, maxindex, depth+1 );
+  }
+  
+  if ( right != NULL )
+  {
+    maxindex++;
+    index.insert ( pair<RegressionNode *, pair<long, int> > ( right, pair<long, int>(maxindex, depth + 1) ) );
+    right->indexDescendants ( index, maxindex, depth+1 );
+  }
+}
+
+void RegressionNode::nodePrediction( 
+          const Vector & y,
+          const vector<int> & selection )
+{
+  double mean = 0.0;
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    mean += y[ selection[i] ];
+  }
+  mean = mean/selection.size();
+  
+  double sum_squares = 0.0;
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    double diff = y[ selection[i] ] - mean;
+    sum_squares += diff*diff;
+  }
+  
+  lsError = sum_squares;
+  predVal = mean;
+}
+
+void RegressionNode::resetCounters ()
+{
+  counter = 0;
+  if ( left != NULL ) left->resetCounters();
+  if ( right != NULL ) right->resetCounters();
+}
+
+void RegressionNode::copy ( RegressionNode *node )
+{
+    left = node->left;
+    right = node->right;
+    threshold = node->threshold; 
+    f = node->f;
+    predVal = node->predVal;
+    lsError = node->lsError;
+    trainExamplesIndices = node->trainExamplesIndices;
+}
+
+bool RegressionNode::isLeaf () const
+{
+    return ( (right == NULL) && (left == NULL) );
+}

+ 92 - 0
regression/randomforest/RegressionNode.h

@@ -0,0 +1,92 @@
+/**
+ * @file RegressionNode.h
+ * @brief regression node
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONNODEINCLUDE
+#define REGRESSIONNODEINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include <map>
+#include <limits>
+
+namespace OBJREC {
+  
+/** regression node: f(x) < threshold ? */
+class RegressionNode
+{
+  protected:
+  
+  public:
+    
+    /** threshold of the regression node */
+    double threshold;
+    
+    /** counter which can be used to
+        count the number of examples which reached the node */
+    double counter;
+    
+    /** the feature used for the regression node split */
+    int f;
+    
+    /** the least squares error of the node */
+    double lsError;
+    
+    /** the prediction value of the node */
+    double predVal;
+
+    /** the left branch of the tree */
+    RegressionNode *left;
+
+    /** the right branch of the tree */
+    RegressionNode *right;
+    
+    /** Indices of examples which were used to estimate the
+     * prediction value during training */
+    std::vector<int> trainExamplesIndices;
+
+    /** constructor */
+    RegressionNode ();
+    
+    /** simple destructor */
+    virtual ~RegressionNode();
+    
+    /** traverse the tree and get the resulting leaf node */
+    RegressionNode *getLeafNode ( const NICE::Vector & x,
+          int depth = std::numeric_limits<int>::max() );
+
+    /** traverse this node with an example */
+    void traverse ( const NICE::Vector & x,
+          double & predVal );
+    
+    /** calculate the overall statistic of the current branch */
+    void statistics ( int & depth, int & count ) const;
+     
+    /** only index descendants (with > depth), do not index node itsself */
+    void indexDescendants ( std::map<RegressionNode *,
+          std::pair<long, int> > & index, 
+          long & maxindex,
+          int depth ) const;
+
+    /** calculate the prediction value for this node */
+    void nodePrediction( const NICE::Vector & y,
+          const std::vector<int> & selection);
+    
+    /** reset the counters variable of the current branch */
+    void resetCounters ();
+
+    /** copy the node information to another node */
+    void copy ( RegressionNode *node );
+
+    /** is this node a leaf */
+    bool isLeaf () const;
+};
+  
+  
+} // namespace
+
+#endif

+ 257 - 0
regression/randomforest/RegressionTree.cpp

@@ -0,0 +1,257 @@
+/** 
+* @file RegressionTree.cpp
+* @brief regression tree implementation
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+#include <assert.h>
+
+#include "vislearning/regression/randomforest/RegressionTree.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RegressionTree::RegressionTree( const Config *_conf ) : conf(_conf)
+{
+  root = NULL;
+}
+
+RegressionTree::~RegressionTree()
+{
+  deleteNodes ( root );
+}
+
+void RegressionTree::statistics ( int & depth, int & count ) const
+{
+  if ( root == NULL )
+  {
+    depth = 0;
+    count = 0;
+  } else {
+    root->statistics ( depth, count );
+  }
+}
+
+void RegressionTree::traverse (
+          const Vector & x,
+          double & predVal )
+{
+  assert( root != NULL );
+  root->traverse ( x, predVal );
+}
+
+void RegressionTree::deleteNodes ( RegressionNode *tree )
+{
+  if ( tree != NULL )
+  {
+    deleteNodes ( tree->left );
+    deleteNodes ( tree->right );
+    delete tree;
+  }
+}
+
+void RegressionTree::clear ()
+{
+  deleteNodes ( root );
+}
+
+void RegressionTree::resetCounters ()
+{
+  if ( root != NULL )
+    root->resetCounters ();
+}
+
+void RegressionTree::indexDescendants ( 
+          map<RegressionNode *, pair<long, int> > & index,
+          long & maxindex ) const
+{
+    if ( root != NULL )
+      root->indexDescendants ( index, maxindex, 0 );
+}
+
+RegressionNode *RegressionTree::getLeafNode ( 
+          Vector & x,
+          int maxdepth )
+{
+    return root->getLeafNode ( x, maxdepth );
+}
+
+void RegressionTree::getLeaves(
+          RegressionNode *node,
+          vector<RegressionNode*> &leaves)
+{
+  if(node->left == NULL && node->right == NULL)
+  {
+    leaves.push_back(node);
+    return;
+  }
+  getLeaves(node->right, leaves);
+  getLeaves(node->left, leaves);
+}
+
+vector<RegressionNode *> RegressionTree::getAllLeafNodes()
+{
+  vector<RegressionNode*> leaves;
+  getLeaves(root, leaves);
+  return leaves;
+}
+
+void RegressionTree::setRoot ( RegressionNode *newroot )
+{
+    root = newroot;
+}
+
+RegressionNode *RegressionTree::pruneTreeLeastSquares (
+          RegressionNode *node,
+          double minErrorReduction,
+          double & lsError )
+{
+  if ( node == NULL )  return NULL;
+  
+  lsError = node->lsError;
+  double leftError, rightError;
+  node->left = pruneTreeLeastSquares ( node->left, minErrorReduction, leftError );
+  node->right = pruneTreeLeastSquares ( node->right, minErrorReduction, rightError );
+
+  if (node->left != NULL && node->right != NULL)
+  {
+    if (lsError-leftError-rightError < minErrorReduction)
+    {
+      deleteNodes( node->left );
+      deleteNodes( node->right );
+    }
+  }
+  
+  return node;
+}
+
+void RegressionTree::pruneTreeLeastSquares ( double minErrorReduction )
+{
+  int depth, count;
+  statistics ( depth, count );
+  fprintf (stderr, "RegressionTree::pruneTreeLeastSquares: depth %d count %d\n", depth, count );
+  double tmp;
+  root = pruneTreeLeastSquares ( root, minErrorReduction, tmp );
+  statistics ( depth, count );
+  fprintf (stderr, "RegressionTree::pruneTreeLeastSquares: depth %d count %d (modified)\n", depth, count );
+}
+
+void RegressionTree::store (ostream & os, int format) const
+{
+  if ( root == NULL ) return;
+  
+  // indexing
+  map<RegressionNode *, pair<long, int> > index;
+
+  index.insert ( pair<RegressionNode *, pair<long, int> > ( NULL, pair<long, int> ( 0, 0 ) ) );
+  index.insert ( pair<RegressionNode *, pair<long, int> > ( root, pair<long, int> ( 1, 0 ) ) );
+  long maxindex = 1;
+  root->indexDescendants ( index, maxindex, 0 );
+
+  for ( map<RegressionNode *, pair<long, int> >::iterator i  = index.begin();
+        i != index.end();
+        i++ )
+  {
+    RegressionNode *node = i->first;
+
+    if ( node == NULL ) continue;
+
+    long ind = i->second.first;
+    long ind_l = index[ node->left ].first;
+    long ind_r = index[ node->right ].first;
+
+    os << "NODE " << ind << " " << ind_l << " " << ind_r << endl;
+
+    if ( !node->isLeaf() ) {
+      os << node->f;
+      os << endl;
+      os << node->threshold;
+      os << endl;
+    } else {
+      os << "LEAF";
+      os << endl;
+    }
+
+    os << node->lsError << " " << -1 << endl;
+  }
+  
+}
+
+void RegressionTree::restore (istream & is, int format)
+{
+  // indexing
+  map<long, RegressionNode *> index;
+  map<long, pair<long, long> > descendants;
+
+  index.insert ( pair<long, RegressionNode *> ( 0, NULL ) );
+
+  // refactor-nice.pl: check this substitution
+  // old: string tag;
+  std::string tag;
+
+  while ( (! is.eof()) && ( (is >> tag) && (tag == "NODE") ) )
+  {
+    long ind;
+    long ind_l;
+    long ind_r;
+    if (! (is >> ind)) break;
+    if (! (is >> ind_l)) break;
+    if (! (is >> ind_r)) break;
+  
+    descendants.insert ( pair<long, pair<long, long> > ( ind, pair<long, long> ( ind_l, ind_r ) ) );
+    RegressionNode *node = new RegressionNode();
+    index.insert ( pair<long, RegressionNode *> ( ind, node ) );
+  
+    std::string feature_tag;
+  
+    is >> feature_tag;
+    if ( feature_tag != "LEAF" )
+    {
+      is >> node->f;
+      is >> node->threshold;
+    }
+  
+    is >> node->lsError;
+  }
+
+  // connecting the tree
+  for ( map<long, RegressionNode *>::const_iterator it = index.begin();
+       it != index.end(); it++ )
+  {
+    RegressionNode *node = it->second;
+
+    if ( node == NULL ) continue;
+
+    long ind_l = descendants[it->first].first;
+    long ind_r = descendants[it->first].second;
+
+    map<long, RegressionNode *>::const_iterator il = index.find ( ind_l );
+    map<long, RegressionNode *>::const_iterator ir = index.find ( ind_r );
+
+    if ( ( il == index.end() ) || ( ir == index.end() ) )
+    {
+      fprintf (stderr, "File inconsistent: unable to build tree\n");
+      exit(-1);
+    }
+
+    RegressionNode *left = il->second;
+    RegressionNode *right = ir->second;
+
+    node->left = left;
+    node->right = right;
+  }
+  
+  map<long, RegressionNode *>::const_iterator iroot = index.find ( 1 );
+
+  if ( iroot == index.end() ) 
+  {
+    fprintf (stderr, "File inconsistent: unable to build tree (root node not found)\n");
+    exit(-1);
+  }
+
+  root = iroot->second;
+}

+ 78 - 0
regression/randomforest/RegressionTree.h

@@ -0,0 +1,78 @@
+/**
+ * @file RegressionTree.h
+ * @brief regression tree implementation for regression
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONTREEINCLUDE
+#define REGRESSIONTREEINCLUDE
+
+#include <map>
+#include <set>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/triplet.h"
+#include "core/basics/Config.h"
+#include "core/basics/Persistent.h"
+#include "vislearning/regression/randomforest/RegressionNode.h"
+
+namespace OBJREC {
+
+/** decision tree implementation for regression */
+class RegressionTree : public NICE::Persistent
+{
+  protected:
+    RegressionNode *root;
+    const NICE::Config *conf; // for restore operation
+    
+  public:
+    static void deleteNodes ( RegressionNode *tree );
+    
+    static RegressionNode *pruneTreeLeastSquares (
+          RegressionNode *node,
+          double minErrorReduction,
+          double & lsError );
+    
+    /** simple consructor */
+    RegressionTree( const NICE::Config *conf );
+    
+    /** simple destructor */
+    virtual ~RegressionTree();
+    
+    void traverse ( const NICE::Vector & x,
+          double & predVal );
+    
+    void resetCounters ();
+    
+    void statistics( int & depth, int & count ) const;
+    
+    void indexDescendants ( std::map<RegressionNode *, std::pair<long, int> > & index,
+          long & maxindex ) const;
+
+    RegressionNode *getLeafNode ( NICE::Vector & x,
+          int maxdepth = 100000 );
+    
+    void getLeaves ( RegressionNode *node, std::vector<RegressionNode*> &leaves);
+    
+    std::vector<RegressionNode *> getAllLeafNodes ();
+    
+    RegressionNode *getRoot( ) const { return root; };
+    
+    void pruneTreeLeastSquares ( double minErrorReduction );
+    
+    void setRoot( RegressionNode *newroot );
+    
+    void restore (std::istream & is, int format = 0);
+    void store (std::ostream & os, int format = 0) const;
+    void clear ();
+    
+
+};
+
+
+} // namespace
+
+#endif

+ 53 - 0
regression/randomforest/RegressionTreeBuilder.cpp

@@ -0,0 +1,53 @@
+/**
+* @file RegressionTreeBuilder.cpp
+* @brief build regression trees
+* @author Sven Sicker
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include <vislearning/regression/randomforest/RegressionTreeBuilder.h>
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+RegressionTreeBuilder::RegressionTreeBuilder ()
+{
+}
+
+RegressionTreeBuilder::~RegressionTreeBuilder ()
+{
+}
+
+void RegressionTreeBuilder::build (
+          RegressionTree& tree, 
+          const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  RegressionNode *root = build ( x, y );
+  tree.setRoot( root );
+  
+  int depth, count;
+  tree.statistics( depth, count );
+  fprintf (stderr, "RegressionTree: maximum depth = %d, number of nodes = %d\n", depth, count );
+}
+
+void RegressionTreeBuilder::collectFeatureValues (
+          const NICE::VVector & x,
+          const std::vector< int > & selection,
+          const int f,
+          vector< pair< double, int > >& values )
+{
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    pair< double, int > curr;
+    double value = x[ selection[i] ][f];
+    curr.first = value;
+    curr.second = selection[i];
+    values.push_back( curr );
+  }
+}

+ 56 - 0
regression/randomforest/RegressionTreeBuilder.h

@@ -0,0 +1,56 @@
+/** 
+ * @file RegressionTreeBuilder.h
+ * @brief build regression trees
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONTREEBUILDERINCLUDE
+#define REGRESSIONTREEBUILDERINCLUDE
+
+#include <map>
+#include <set>
+
+#include "core/basics/triplet.h"
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/regression/randomforest/RegressionNode.h"
+#include "vislearning/regression/randomforest/RegressionTree.h"
+
+
+namespace OBJREC {
+
+/** build regression trees */
+class RegressionTreeBuilder
+{
+
+  protected:
+
+  public:
+  
+    /** simple constructor */
+    RegressionTreeBuilder();
+          
+    /** simple destructor */
+    virtual ~RegressionTreeBuilder();
+        
+    virtual RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y ) = 0;
+
+    void collectFeatureValues(const NICE::VVector & x,
+          const std::vector<int> & selection,
+          const int f,
+          std::vector< std::pair< double, int > > & values );
+
+    void build ( RegressionTree & tree, 
+          const NICE::VVector & x,
+          const NICE::Vector & y );
+
+};
+
+
+} // namespace
+
+#endif

+ 1 - 0
regression/randomforest/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)

+ 8 - 0
regression/regcombination/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/regcombination/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 153 - 0
regression/regcombination/RegPreRandomForests.cpp

@@ -0,0 +1,153 @@
+/**
+* @file RegPreRandomForests.cpp
+* @brief Combination of a regression method with a pre-clustering using a random forest
+* @author Sven Sickert
+* @date 07/12/2013
+
+*/
+
+#include "vislearning/regression/regcombination/RegPreRandomForests.h"
+
+#include <iostream>
+#include <assert.h>
+
+using namespace OBJREC;
+using namespace std;
+using namespace NICE;
+
+RegPreRandomForests::RegPreRandomForests(const Config * conf,
+          const string & section,
+          RegressionAlgorithm *_leafRegressionPrototype )
+        : leafRegressionPrototype(_leafRegressionPrototype)
+{
+  string cluster_section = conf->gS ( section, "cluster_section", "RandomForest" );
+  mEx = conf->gI ( "RTBRandom", "min_examples", 500 );
+  randomforest = new RegRandomForests( conf, cluster_section );
+}
+
+RegPreRandomForests::~RegPreRandomForests()
+{
+  // delete the random forest
+  if ( randomforest != NULL )
+    delete randomforest;
+  
+  // delte all regression methods in the leafs
+  for ( map<RegressionNode *, RegressionAlgorithm * >::const_iterator it = leafRegressions.begin();
+        it != leafRegressions.end(); it++ )
+  {
+    RegressionAlgorithm * lr = it->second;
+    delete lr;
+  }
+}
+
+void RegPreRandomForests::teach ( const VVector & X, const Vector & y )
+{
+  randomforest->teach ( X, y );
+
+  if ( leafRegressionPrototype != NULL )
+  {
+    vector<RegressionNode *> leafNodes;
+    randomforest->getAllLeafNodes ( leafNodes );
+        
+    int lsize = leafNodes.size();
+    int leafNo = 0;
+    cerr << "leafnodes: " << lsize << endl;
+        
+    #pragma omp parallel for
+    for ( int l = 0; l < lsize; l++ )
+    {
+      leafNo++;
+
+      RegressionNode *node = leafNodes[l];
+
+      if ( !node->isLeaf() ){
+        fprintf( stderr, "RegPreRandomForests::predict: ID #%d not a leaf node!", leafNo );
+        continue;
+      }
+
+      vector<int> leafTrainInds = node->trainExamplesIndices;
+      cerr << "Teaching regression method for leaf " << leafNo-1 << "..." << endl;
+      cerr << "examples in leave: " << leafTrainInds.size() << endl;
+      assert ( leafTrainInds.size() > 0 );
+          
+      sort ( leafTrainInds.begin(), leafTrainInds.end() );
+
+      NICE::VVector leafTrainData;
+      vector<double> tmpVals;
+
+      for ( int i = 0; i < (int)leafTrainInds.size(); i++ )
+      {
+        if ( leafTrainInds[i] >= 0 && leafTrainInds[i] < (int)y.size() )
+        {
+          leafTrainData.push_back( X[ leafTrainInds[i] ] );
+          tmpVals.push_back( y[ leafTrainInds[i] ] );
+        }
+      }
+
+      if (leafTrainData.size() <= 0 ) continue;
+
+      NICE::Vector leafTrainVals( tmpVals );
+
+      RegressionAlgorithm *lr = leafRegressionPrototype->clone();
+
+      lr->teach( leafTrainData, leafTrainVals );
+
+      leafRegressions.insert ( pair< RegressionNode *, RegressionAlgorithm *> ( node, lr ) );
+    }
+  }
+}
+
+
+double RegPreRandomForests::predict ( const Vector & x )
+{
+  double pred = 0.0;
+  
+  vector<RegressionNode *> leafNodes;
+  
+  // traverse the forest and obtain all innvolved leaf nodes
+  randomforest->getLeafNodes ( x, leafNodes );
+  
+  for ( vector<RegressionNode *>::const_iterator it = leafNodes.begin();
+        it != leafNodes.end(); it++ )
+  {
+    RegressionNode *node = *it;
+    map<RegressionNode *, RegressionAlgorithm *>::const_iterator leafRegressionIt =
+      leafRegressions.find( node );
+    
+    if ( leafRegressionIt == leafRegressions.end() )
+    {
+      // this leaf has no associated regression method
+      // -> we will use the random forest result
+      pred += node->predVal;
+      continue;
+    }
+    
+    RegressionAlgorithm *leafRegression = leafRegressionIt->second;
+    pred += leafRegression->predict( x );
+  }
+  
+  pred /= leafNodes.size();
+  
+  return pred;
+}
+
+void RegPreRandomForests::clear ()
+{
+  map<RegressionNode *, RegressionAlgorithm *>::iterator iter;
+  for ( iter = leafRegressions.begin(); iter != leafRegressions.end(); iter++ )
+  {
+    iter->second->clear();
+  }
+  randomforest->clear();
+}
+
+void RegPreRandomForests::store ( ostream & os, int format ) const
+{
+  cerr << "RegPreRandomForest::store: not yet implemented" << endl;
+}
+
+void RegPreRandomForests::restore ( istream& is, int format )
+{
+  cerr << "RegPreRandomForest::restore: not yet implemented" << endl;
+}
+

+ 62 - 0
regression/regcombination/RegPreRandomForests.h

@@ -0,0 +1,62 @@
+/**
+* @file RegPreRandomForests.h
+* @brief Combination of a regression method with a pre-clustering using a random forest
+* @author Sven Sickert
+* @date 07/12/2013
+*/
+#ifndef REGPRERANDOMFORESTSINCLUDE
+#define REGPRERANDOMFORESTSINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include <map>
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+#include "vislearning/regression/randomforest/RegRandomForests.h"
+
+namespace OBJREC{
+
+/** Combination of a regression method with a pre-clustering using a random forest */
+class RegPreRandomForests : public RegressionAlgorithm
+{
+  protected:
+    /** the regression prototype used to process
+        all examples in a leaf */
+    RegressionAlgorithm *leafRegressionPrototype;
+    
+    /** regression of each leaf */
+    std::map<RegressionNode *, RegressionAlgorithm *> leafRegressions;
+    
+    /** the random forest used to pre-cluster the features */
+    RegRandomForests *randomforest;
+    
+    /** maximum number of Examples in a leaf */
+    int mEx;
+    
+  public:
+    /** simple constructor */
+    RegPreRandomForests( const NICE::Config *conf,
+          const std::string & section,
+          RegressionAlgorithm * _leafRegressionPrototype );
+   
+   /** simple destructor */
+   virtual ~ RegPreRandomForests();
+   
+   /** learn parameters/models/whatever using a set of vectors and
+    *  their corresponding function values
+    */
+   void teach ( const NICE::VVector & X, const NICE::Vector & y );
+   
+   /** predict the function value for \c x */
+   double predict ( const NICE::Vector & x );
+   
+   void clear();
+   void store ( std::ostream & os, int format = 0 ) const;
+   void restore ( std::istream & is, int format = 0 );
+
+};
+
+} // namespace
+
+#endif

+ 3 - 0
regression/regcombination/libdepend.inc

@@ -0,0 +1,3 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)
+$(call PKG_DEPEND_INT,vislearning/regression/gpregression)
+$(call PKG_DEPEND_INT,vislearning/regression/randomforest)

+ 3 - 2
regression/regressionbase/RegressionAlgorithmKernel.cpp

@@ -29,6 +29,7 @@ RegressionAlgorithmKernel::RegressionAlgorithmKernel( const RegressionAlgorithmK
 	
 	this->X = src.X;
 	this->y = src.y;
+  this->conf = src.conf;
 }
 
 RegressionAlgorithmKernel::~RegressionAlgorithmKernel()
@@ -39,7 +40,7 @@ void RegressionAlgorithmKernel::teach ( const VVector & X, const NICE::Vector &
 {
 	if ( kernelFunction == NULL )
 		fthrow( Exception, "RegressionAlgorithmKernel::teach: To use this function, you have to specify a kernel function using the constructor" );
-
+  
 	this->y = y;
 	this->X = X;
 
@@ -48,7 +49,7 @@ void RegressionAlgorithmKernel::teach ( const VVector & X, const NICE::Vector &
 	kernelFunction->calcKernelData ( this->X, kernelData );
 	kernelData->updateCholeskyFactorization();
 
-	teach ( kernelData, this->y );
+	teachKernel ( kernelData, this->y );
 }
 
 double RegressionAlgorithmKernel::predict ( const NICE::Vector & x )

+ 1 - 1
regression/regressionbase/RegressionAlgorithmKernel.h

@@ -41,7 +41,7 @@ class RegressionAlgorithmKernel : public RegressionAlgorithm
      *  of a set
      *  of vectors and the corresponding function values \c y
      */
-    virtual void teach ( KernelData *kernelData, const NICE::Vector & y ) = 0;
+    virtual void teachKernel ( KernelData *kernelData, const NICE::Vector & y ) = 0;
 
     /** predict the function value for a vector by using its kernel values with
      * the used training set, be careful with the order in \c kernelVector