Johannes Ruehle пре 11 година
родитељ
комит
54d5633076
62 измењених фајлова са 6703 додато и 222 уклоњено
  1. 5 5
      cbaselib/CachedExample.cpp
  2. 3 3
      cbaselib/ClassNames.cpp
  3. 4 3
      cbaselib/ClassNames.h
  4. 260 195
      cbaselib/ImageInfo.cpp
  5. 25 6
      cbaselib/ImageInfo.h
  6. 6 6
      classifier/kernelclassifier/KCGPApproxOneClass.cpp
  7. 31 0
      corefiles.cmake
  8. 6 0
      progfiles.cmake
  9. 4 3
      regression/gpregression/RegGaussianProcess.cpp
  10. 3 0
      regression/gpregression/RegGaussianProcess.h
  11. 137 0
      regression/linregression/LinRegression.cpp
  12. 57 0
      regression/linregression/LinRegression.h
  13. 8 0
      regression/linregression/Makefile
  14. 103 0
      regression/linregression/Makefile.inc
  15. 117 0
      regression/linregression/RANSACReg.cpp
  16. 66 0
      regression/linregression/RANSACReg.h
  17. 1 0
      regression/linregression/libdepend.inc
  18. 8 0
      regression/npregression/Makefile
  19. 103 0
      regression/npregression/Makefile.inc
  20. 135 0
      regression/npregression/RegKNN.cpp
  21. 62 0
      regression/npregression/RegKNN.h
  22. 1 0
      regression/npregression/libdepend.inc
  23. 285 0
      regression/progs/testLinRegression.cpp
  24. 290 0
      regression/progs/testNPRegression.cpp
  25. 286 0
      regression/progs/testRANSACRegression.cpp
  26. 272 0
      regression/progs/testRegressionGP.cpp
  27. 327 0
      regression/progs/testRegressionRDF.cpp
  28. 291 0
      regression/progs/testSplineRegression.cpp
  29. 8 0
      regression/randomforest/Makefile
  30. 103 0
      regression/randomforest/Makefile.inc
  31. 167 0
      regression/randomforest/RTBClusterRandom.cpp
  32. 59 0
      regression/randomforest/RTBClusterRandom.h
  33. 190 0
      regression/randomforest/RTBGrid.cpp
  34. 59 0
      regression/randomforest/RTBGrid.h
  35. 258 0
      regression/randomforest/RTBLinear.cpp
  36. 77 0
      regression/randomforest/RTBLinear.h
  37. 289 0
      regression/randomforest/RTBMeanPostImprovement.cpp
  38. 72 0
      regression/randomforest/RTBMeanPostImprovement.h
  39. 250 0
      regression/randomforest/RTBMinDist.cpp
  40. 66 0
      regression/randomforest/RTBMinDist.h
  41. 228 0
      regression/randomforest/RTBRandom.cpp
  42. 72 0
      regression/randomforest/RTBRandom.h
  43. 357 0
      regression/randomforest/RegRandomForests.cpp
  44. 128 0
      regression/randomforest/RegRandomForests.h
  45. 146 0
      regression/randomforest/RegressionNode.cpp
  46. 92 0
      regression/randomforest/RegressionNode.h
  47. 257 0
      regression/randomforest/RegressionTree.cpp
  48. 78 0
      regression/randomforest/RegressionTree.h
  49. 53 0
      regression/randomforest/RegressionTreeBuilder.cpp
  50. 56 0
      regression/randomforest/RegressionTreeBuilder.h
  51. 1 0
      regression/randomforest/libdepend.inc
  52. 8 0
      regression/regcombination/Makefile
  53. 103 0
      regression/regcombination/Makefile.inc
  54. 158 0
      regression/regcombination/RegPreRandomForests.cpp
  55. 62 0
      regression/regcombination/RegPreRandomForests.h
  56. 3 0
      regression/regcombination/libdepend.inc
  57. 4 1
      regression/regressionbase/RegressionAlgorithmKernel.cpp
  58. 223 0
      regression/splineregression/CRSplineReg.cpp
  59. 68 0
      regression/splineregression/CRSplineReg.h
  60. 8 0
      regression/splineregression/Makefile
  61. 103 0
      regression/splineregression/Makefile.inc
  62. 1 0
      regression/splineregression/libdepend.inc

+ 5 - 5
cbaselib/CachedExample.cpp

@@ -269,14 +269,14 @@ bool CachedExample::colorInformationAvailable() const
   else {
     if ( imgfn.size() == 0 ) return false;
 
-    int tmp_xsize, tmp_ysize, tmp_maxval, tmp_nr;
+//     int tmp_xsize, tmp_ysize, tmp_maxval, tmp_nr;
     // refactor: InfImgFile ( imgfn, tmp_xsize, tmp_ysize, tmp_maxval, tmp_nr );
     ImageFile imgf ( imgfn );
     const ImageFile::Header & imgfheader = imgf.getHeader();
-    tmp_xsize = imgfheader.width;
-    tmp_ysize = imgfheader.height;
-    tmp_maxval = 255;
-    tmp_nr = imgfheader.channel;
+//     tmp_xsize = imgfheader.width;
+//     tmp_ysize = imgfheader.height;
+//     tmp_maxval = 255;
+    int tmp_nr = imgfheader.channel;
 
     if ( tmp_nr > 1 ) return true;
     else return false;

+ 3 - 3
cbaselib/ClassNames.cpp

@@ -71,7 +71,7 @@ ClassNames::ClassNames ( const ClassNames & cn,
 ClassNames::ClassNames ( const ClassNames & cn )
     : tbl_code_text ( cn.tbl_code_text ), tbl_text_code ( cn.tbl_text_code ),
     tbl_classno_code ( cn.tbl_classno_code ), tbl_code_classno ( cn.tbl_code_classno ),
-    tbl_color_classno ( tbl_color_classno ), tbl_classno_color ( cn.tbl_classno_color ), maxClassNo ( cn.maxClassNo )
+    tbl_color_classno ( cn.tbl_color_classno ), tbl_classno_color ( cn.tbl_classno_color ), maxClassNo ( cn.maxClassNo )
 {
 }
 
@@ -394,7 +394,8 @@ void ClassNames::getClassnoFromColor ( int & classno, int r, int g, int b ) cons
 #if defined WIN32 && defined NICE_USELIB_BOOST  
   boost::unordered_map<long, int>::const_iterator i = tbl_color_classno.find ( color );
 #else
-  __gnu_cxx::hash_map<long, int>::const_iterator i = tbl_color_classno.find ( color );
+//  __gnu_cxx::hash_map<long, int>::const_iterator i = tbl_color_classno.find ( color );
+  std::tr1::unordered_map<long, int>::const_iterator i = tbl_color_classno.find ( color );  
 #endif
 
   if ( i == tbl_color_classno.end() )
@@ -483,4 +484,3 @@ void ClassNames::clear ()
   tbl_color_classno.clear();
   tbl_classno_color.clear();
 }
-

+ 4 - 3
cbaselib/ClassNames.h

@@ -22,7 +22,8 @@
 #include <boost/unordered_map.hpp>
 #endif
 #else
-#include <ext/hash_map>
+//#include <ext/hash_map>
+#include <tr1/unordered_map>
 #endif
 
 #include "core/basics/Config.h"
@@ -44,9 +45,9 @@ class ClassNames : public NICE::Persistent
 #if defined WIN32 && defined NICE_USELIB_BOOST
     boost::unordered_map<long, int> tbl_color_classno;
 #else
-	__gnu_cxx::hash_map<long, int> tbl_color_classno;
+//	__gnu_cxx::hash_map<long, int> tbl_color_classno;
+    std::tr1::unordered_map<long, int> tbl_color_classno;
 #endif
-
     std::map<int, long> tbl_classno_color;
 
 

+ 260 - 195
cbaselib/ImageInfo.cpp

@@ -53,221 +53,285 @@ ImageInfo::~ImageInfo()
 bool
 ImageInfo::loadImageInfo(const string &aFilename)
 {
-	#ifdef NICE_USELIB_QT4_XML
+    #ifdef NICE_USELIB_QT4_XML
 
-	QString filename(aFilename.data());
-	QDomDocument doc("Image Labeler");
-	QFile file(filename);
-	if (!file.open(QIODevice::ReadOnly)) {
-		cout << "loadImageInfo:Can not open such file\n";
-		return false;
-		/* NOTREACHED */
-	}
+    QString filename(aFilename.data());
+    QDomDocument doc("Image Labeler");
+    QFile file(filename);
+    if (!file.open(QIODevice::ReadOnly)) {
+        cout << "loadImageInfo:Can not open such file\n";
+        return false;
+        /* NOTREACHED */
+    }
 
-	QString errMsg;
-	if (!doc.setContent(&file, &errMsg)) {
-		QByteArray array = errMsg.toAscii();
-		cout << array.data();
-		//showWarning(errMsg);
-		file.close();
-		return false;
-		/* NOTREACHED */
-	}
+    QString errMsg;
+    if (!doc.setContent(&file, &errMsg)) {
+        QByteArray array = errMsg.toAscii();
+        cout << array.data();
+        //showWarning(errMsg);
+        file.close();
+        return false;
+        /* NOTREACHED */
+    }
 
-	file.close();
+    file.close();
 
-	/* getting all info */
-	QDomElement elements = doc.documentElement();
-	QDomNode rootNode = elements.firstChild();
-	QString string_buffer;
-	int width = -1;
-	int height = -1;
+    /* getting all info */
+    QDomElement elements = doc.documentElement();
+    QDomNode rootNode = elements.firstChild();
+
+    m_iImageWidth = -1;
+    m_iImageHeight = -1;
 
 //	cout << "\nlet the parsing begin!\n";
-	while(!rootNode.isNull()) {
-		QDomElement element = rootNode.toElement();
-		if(!element.isNull()) {
-			/* path to the image */
-			if (element.tagName() == "image") {
-
-                string_buffer = element.text();
-
-                if (string_buffer.isEmpty())
-                {
-					cout << "loadImageInfo:The file with data"
-							" doesn't contain path to the image\n";
-					return false;
-				}
-                if( QFileInfo(string_buffer).isRelative() )
-                {
-                    QString asd = QFileInfo( QString(aFilename.c_str()) ).absoluteDir().absolutePath();
-
-                    QString qwe = QFileInfo( asd + "/" + string_buffer ).absoluteFilePath();
-
-                    string_buffer = qwe;
-                }
-
-                image_path_ = string_buffer.toStdString();
-			}
-			/* path to the segmented image */
-			if (element.tagName() == "segmented") {
-				string_buffer = element.text();
-				if ( !string_buffer.isEmpty() ) {
-					QByteArray array = string_buffer.toAscii();
-					segmented_image_path_ = string(array.data());
-				}
-			}
-			/* image description */
-			else if (element.tagName() == "description") {
-				string_buffer = element.text();
-				if ( !string_buffer.isEmpty()) {
-					QByteArray array = string_buffer.toAscii();
-					image_description_ = string(array.data());
-				}
-			}
-			/* tags */
-			else if (element.tagName() == "tags") {
-				string_buffer = element.text();
-                if ( !string_buffer.isEmpty()) {
-                    QByteArray array = string_buffer.toAscii();
-                    //TODO: make parsing into the string list
-                    tags_ = string(array.data());
-                }
-			}
-			/* legend */
-			else if (element.tagName() == "legend") {
-				loadLegendFromElement(&element);
-			}
-			/* objects */
-			else if (element.tagName() == "objects") {
-				QDomNode subNode = element.firstChild();
-				QDomElement subElement;
-
-				while(!subNode.isNull()) {
-					subElement = subNode.toElement();
-
-					if (subElement.isNull() || subElement.text().isEmpty()) {
-						subNode = subNode.nextSibling();
-						continue;
-					}
-
-					string_buffer = subElement.attribute("id");
-					bool ok = 1;
-					int id = string_buffer.toInt(&ok, 10);
-
-					if (!ok) {
-						cout << "loadImageInfo: "
-								"poly id format is corrupted\n";
-						subNode = subNode.nextSibling();
-						continue;
-					}
-
-                    // try reading a unique object/bounding box id, which identifies
-                    // this object against all others (not a label)
-                    string_buffer = subElement.attribute("uniqueObjectId");
-                    ok = 1;
-                    int uniqueObjectId = string_buffer.toInt(&ok, 10);
-                    if(!ok)
-                        uniqueObjectId = -1;
-
-
-                    string_buffer = subElement.text();
-					if (subElement.tagName() == "bbox") {
-                        BoundingBox bbox;
-                        bool bValid = BBoxFromData(&string_buffer, id, bbox);
-                        if( bValid )
-                        {
-                            bbox.unique_id_ = uniqueObjectId;
-
-                            bboxes_.push_back(bbox);
-                        }
-					}
-					if (subElement.tagName() == "poly") {
-                        Polygon poly;
-                        bool bValid = polyFromData(&string_buffer, poly);
-                        if(bValid)
-                        {
-                            poly.setID(id);
-                            poly.unique_id_ = uniqueObjectId;
-
-                            polys_.push_back(poly);
-                        }
-
-					}
-
-
-
-					subNode = subNode.nextSibling();
-				}
-			}
-			/* image size */
-			else if (element.tagName() == "image_size") {
-				string_buffer = element.text();
-				if (string_buffer.isEmpty()) {
-					cout << "loadImageInfo: "
-							"image size format is corrupted\n";
-					return false;
-					/* NOTREACHED */
-				}
-
-				QString buffer;
-				int size = string_buffer.size();
-				bool ok = 0;
-				for (int i = 0; i < size; i++) {
-					/* ";" is a separator */
-					if (';' != string_buffer.at(i))
-						continue;
-
-					buffer = string_buffer.mid(0, i);
-
-					width = buffer.toInt(&ok, 10);
-					if (!ok) {
-						cout <<
-							"loadImageInfo: "
-							"image size format is corrupted\n";
-						return false;
-						/* NOTREACHED */
-					}
-
-					buffer = string_buffer.mid(i + 1, size - (i + 1));
-
-					height = buffer.toInt(&ok, 10);
-
-					if (!ok) {
-						cout <<
-							"loadImageInfo: "
-							"image size format is corrupted";
-						return false;
-						/* NOTREACHED */
-					}
-					break;
-				}
-			}
-			else if (element.tagName() == "pure_data") {
-				string_buffer = element.text();
-				labeled_image_ = imageTFromData(width, height, &string_buffer);
-			}
-		}
-		rootNode = rootNode.nextSibling();
-	}
+    while(!rootNode.isNull()) {
+        QDomElement element = rootNode.toElement();
+        if(!element.isNull()) {
+            // path to the image
+            if (element.tagName() == "image") {
+                if( !this->extractSectionImage( &element, aFilename) )
+                    return false;
+            }
+            // path to the segmented image
+            if (element.tagName() == "segmented") {
+                if( !this->extractSectionSegmented(&element) )
+                    return false;
+            }
+            // image description
+            else if (element.tagName() == "description") {
+                if( !this->extractSectionDescription(&element) )
+                    return false;
+            }
+            // tags
+            else if (element.tagName() == "tags") {
+                if( !this->extractSectionTags(&element) )
+                    return false;
+            }
+            // legend
+            else if (element.tagName() == "legend") {
+                extractSectionLegend(&element);
+            }
+            // objects
+            else if (element.tagName() == "objects") {
+                if( !this->extractSectionObjects(&element) )
+                    return false;
+            }
+            // image size
+            else if (element.tagName() == "image_size") {
+                if( !this->extractImageSize(&element) )
+                    return false;
+            }
+            else if (element.tagName() == "pure_data") {
+                if( !this->extractSectionPureData(&element) )
+                    return false;
+            }
+        }
+        rootNode = rootNode.nextSibling();
+    }
 
-	#endif //NICE_USELIB_QT4_XML
+    #endif //NICE_USELIB_QT4_XML
 
-	return true;
+    return true;
 }
 
 #ifdef NICE_USELIB_QT4_XML
 
+bool ImageInfo::extractSectionImage(QDomElement *element , const std::string &p_sImageInfoFilename)
+{
+    QString string_buffer = element->text();
+
+    if (string_buffer.isEmpty())
+    {
+        cout << "loadImageInfo:The file with data"
+                " doesn't contain path to the image\n";
+        return false;
+    }
+    if( QFileInfo(string_buffer).isRelative() )
+    {
+        QString asd = QFileInfo( QString(p_sImageInfoFilename.c_str()) ).absoluteDir().absolutePath();
+
+        QString qwe = QFileInfo( asd + "/" + string_buffer ).absoluteFilePath();
+
+        string_buffer = qwe;
+    }
+
+    image_path_ = string_buffer.toStdString();
+    return true;
+}
+bool ImageInfo::extractSectionSegmented(QDomElement *element )
+{
+    QString string_buffer = element->text();
+    if ( !string_buffer.isEmpty() ) {
+        QByteArray array = string_buffer.toAscii();
+        segmented_image_path_ = string(array.data());
+    }
+    return true;
+}
+bool ImageInfo::extractSectionDescription(QDomElement *element )
+{
+    QString string_buffer = element->text();
+    if ( !string_buffer.isEmpty()) {
+        QByteArray array = string_buffer.toAscii();
+        image_description_ = string(array.data());
+    }
+    return true;
+}
+bool ImageInfo::extractSectionTags(QDomElement *element )
+{
+    QString string_buffer = element->text();
+    if ( !string_buffer.isEmpty())
+    {
+        QByteArray array = string_buffer.toAscii();
+        //TODO: make parsing into the string list
+        tags_ = string(array.data());
+    }
+    return true;
+}
+
+bool ImageInfo::extractSectionObjects(QDomElement *element )
+{
+    QDomNode subNode = element->firstChild();
+    QDomElement subElement;
+
+    while(!subNode.isNull()) {
+        subElement = subNode.toElement();
+
+        if (subElement.isNull() || subElement.text().isEmpty()) {
+            subNode = subNode.nextSibling();
+            continue;
+        }
+
+        if (subElement.tagName() == "bbox") {
+            if ( !this->extractObjectRectangle(&subElement) )
+                return false;
+        }
+        if (subElement.tagName() == "poly") {
+           if ( !this->extractObjectPolygon(&subElement) )
+               return false;
+        }
+
+        subNode = subNode.nextSibling();
+    }
+    return true;
+}
+bool ImageInfo::extractObjectPolygon(QDomElement *element )
+{
+    QString string_buffer = element->attribute("id");
+    bool ok = 1;
+    int id = string_buffer.toInt(&ok, 10);
+
+    if (!ok) {
+        cout << "loadImageInfo: "
+                "poly id format is corrupted\n";
+        return false;
+    }
+
+    // try reading a unique object/bounding box id, which identifies
+    // this object against all others (not a label)
+    string_buffer = element->attribute("uniqueObjectId");
+    ok = 1;
+    int uniqueObjectId = string_buffer.toInt(&ok, 10);
+    if(!ok)
+        uniqueObjectId = -1;
+
+    string_buffer = element->text();
+    Polygon poly;
+    bool bValid = polyFromData(&string_buffer, poly);
+    if( !bValid )
+        return false;
+
+    poly.setID(id);
+    poly.unique_id_ = uniqueObjectId;
+
+    polys_.push_back(poly);
+
+    return true;
+}
+bool ImageInfo::extractObjectRectangle(QDomElement *element )
+{
+    QString string_buffer = element->attribute("id");
+    bool ok = 1;
+    int id = string_buffer.toInt(&ok, 10);
+
+    if (!ok)
+    {
+        cout << "loadImageInfo: "
+                "poly id format is corrupted\n";
+        return false;
+    }
+
+    // try reading a unique object/bounding box id, which identifies
+    // this object against all others (not a label)
+    string_buffer = element->attribute("uniqueObjectId");
+    ok = 1;
+    int uniqueObjectId = string_buffer.toInt(&ok, 10);
+    if(!ok)
+        uniqueObjectId = -1;
+
+    string_buffer = element->text();
+    BoundingBox bbox;
+    bool bValid = BBoxFromData(&string_buffer, id, bbox);
+    if( !bValid )
+        return false;
+
+    bbox.unique_id_ = uniqueObjectId;
+    bboxes_.push_back(bbox);
+
+    return true;
+}
+
+bool ImageInfo::extractImageSize(QDomElement *element )
+{
+    QString string_buffer = element->text();
+    if (string_buffer.isEmpty()) {
+        cout << "loadImageInfo: "
+                "image size format is corrupted\n";
+        return false;
+    }
+
+    QStringList coordsList = string_buffer.split(";", QString::SkipEmptyParts);
+
+    try
+    {
+        if( coordsList.size() == 2)
+        {
+            bool ok = false;
+            this->m_iImageWidth = QVariant(coordsList[0]).toInt( &ok );
+            if( !ok ) return false;
+            this->m_iImageHeight = QVariant(coordsList[1]).toInt( &ok );
+            if( !ok ) return false;
+
+            return true;
+        }
+    } catch(std::exception &e)
+    {
+        std::cout << "exception: image size format is corrupted" << e.what() << std::endl;
+        return false;
+    }
+
+    return false;
+}
+bool ImageInfo::extractSectionPureData(QDomElement *element )
+{
+    if( m_iImageHeight < 0 || m_iImageWidth < 0)
+        return false;
+
+    QString string_buffer = element->text();
+    labeled_image_ = imageTFromData(m_iImageWidth, m_iImageHeight, &string_buffer);
+
+    return true;
+}
+
+
 //! A member loading legend from xml node
 /*!
  * \param[in] anElement a pointer to the object containing all the legend
  */
 void
-ImageInfo::loadLegendFromElement(QDomElement *anElement)
+ImageInfo::extractSectionLegend(QDomElement *anElement)
 {
 	if (!anElement) {
 		return;
-		/* NOTREACHED */
 	}
 	QDomNode subNode = anElement->firstChild();
 	QDomElement subElement;
@@ -280,6 +344,7 @@ ImageInfo::loadLegendFromElement(QDomElement *anElement)
 
 		subNode = subNode.nextSibling();
 	}
+
 }
 
 //! Loads one category info(label) from xml QDomElement

+ 25 - 6
cbaselib/ImageInfo.h

@@ -54,17 +54,30 @@ class ImageInfo
 
 #ifdef NICE_USELIB_QT4_XML
 
-    bool polyFromData( QString *aPolyData, Polygon &p_Poly);
-    bool BBoxFromData(QString *aBBoxData, int &id , BoundingBox &p_bbox);
+    virtual bool polyFromData( QString *aPolyData, Polygon &p_Poly);
+    virtual bool BBoxFromData(QString *aBBoxData, int &id , BoundingBox &p_bbox);
+
+    virtual void extractSectionLegend ( QDomElement *anElement );
+    virtual bool extractSectionImage(QDomElement *element, const std::string &p_sImageInfoFilename );
+    virtual bool extractSectionSegmented(QDomElement *element );
+    virtual bool extractSectionDescription(QDomElement *element );
+    virtual bool extractSectionTags(QDomElement *element );
+    virtual bool extractSectionObjects(QDomElement *element );
+    virtual bool extractObjectPolygon(QDomElement *element );
+    virtual bool extractObjectRectangle(QDomElement *element );
+    virtual bool extractImageSize(QDomElement *element );
+    virtual bool extractSectionPureData(QDomElement *element );
+    virtual bool loadCategoryInfo ( QDomElement *anElement );
+
 
-    void loadLegendFromElement ( QDomElement *anElement );
-    bool loadCategoryInfo ( QDomElement *anElement );
     NICE::ImageT< unsigned int > imageTFromData (
       const int &aWidth,
       const int &aHeight,
       QString *aPureData
     );
 
+
+
 #endif //NICE_USELIB_QT4_XML
 
   public:
@@ -107,7 +120,7 @@ class ImageInfo
       return localization_info;
     };
 
-    bool loadImageInfo ( const std::string &aFilename );
+    virtual bool loadImageInfo ( const std::string &aFilename );
 
     const std::list< CategoryInfo > * labels() const;
     const std::list< BoundingBox > * bboxes() const;
@@ -118,7 +131,11 @@ class ImageInfo
     std::string imageDescription() const;
     std::string segmentedImagePath() const;
 
-  private:
+    void setListOfPolygons( std::list< Polygon > &p_polys )
+    {
+        polys_ = p_polys;
+    }
+  protected:
     std::list< CategoryInfo > labels_;
     std::list< BoundingBox > bboxes_;
     std::list< Polygon > polys_;
@@ -128,6 +145,8 @@ class ImageInfo
     std::string image_path_;
     std::string image_description_;
     std::string segmented_image_path_;
+    int m_iImageWidth;
+    int m_iImageHeight;
 
 };
 

+ 6 - 6
classifier/kernelclassifier/KCGPApproxOneClass.cpp

@@ -82,9 +82,9 @@ void KCGPApproxOneClass::teach (const LabeledSetVector &teachSet)
   
   //now sum up all entries of each row in the original kernel matrix
   double kernelScore(0.0);
-  for (int i = 0; i < this->vecSetLabels.size(); i++)
+  for (int i = 0; i < (int)this->vecSetLabels.size(); i++)
   {
-    for (int j = i; j < this->vecSetLabels.size(); j++)
+    for (int j = i; j < (int)this->vecSetLabels.size(); j++)
     {
       kernelScore = this->kernelFunction->K(vecSet[i],vecSet[j]);
       this->matrixDInv[i] += kernelScore;
@@ -94,7 +94,7 @@ void KCGPApproxOneClass::teach (const LabeledSetVector &teachSet)
   }  
   
   //compute its inverse
-  for (int i = 0; i < this->vecSetLabels.size(); i++)
+  for (int i = 0; i < (int)this->vecSetLabels.size(); i++)
   {
     this->matrixDInv[i] = 1.0 / this->matrixDInv[i];
   }
@@ -103,7 +103,7 @@ void KCGPApproxOneClass::teach (const LabeledSetVector &teachSet)
   if(this->mode==MEAN_DETECTION_MODE)
   {
     this->InvDY.resize ( this->vecSetLabels.size() );
-    for (int i = 0; i < this->vecSetLabels.size(); i++)
+    for (int i = 0; i < (int)this->vecSetLabels.size(); i++)
     {
       this->InvDY[i] = this->vecSetLabels[i] * this->matrixDInv[i];
     }
@@ -133,7 +133,7 @@ ClassificationResult KCGPApproxOneClass::classifyKernel ( const NICE::Vector & k
         kernelVector.size() << " does not match number of training points " << this->vecSetLabels.size() );
       
     NICE::Vector rightPart (this->vecSetLabels.size());
-    for (int i = 0; i < this->vecSetLabels.size(); i++)
+    for (int i = 0; i < (int)this->vecSetLabels.size(); i++)
     {
       rightPart[i] = kernelVector[i] * this->matrixDInv[i];
     }
@@ -170,4 +170,4 @@ void KCGPApproxOneClass::restore(std::istream& ifs, int type)
 
 void KCGPApproxOneClass::clear()
 {
-}
+}

+ 31 - 0
corefiles.cmake

@@ -210,6 +210,18 @@ SET(nice_vislearning_SRC
 ./classifier/kernelclassifier/KCGPRegression.cpp
 ./classifier/kernelclassifier/KCMinimumEnclosingBall.cpp
 ./matlabAccessHighLevel/ImageNetData.cpp
+./regression/linregression/LinRegression.cpp
+./regression/linregression/RANSACReg.cpp
+./regression/npregression/RegKNN.cpp
+./regression/randomforest/RTBRandom.cpp
+./regression/randomforest/RTBMinDist.cpp
+./regression/randomforest/RegRandomForests.cpp
+./regression/randomforest/RegressionNode.cpp
+./regression/randomforest/RegressionTree.cpp
+./regression/randomforest/RegressionTreeBuilder.cpp
+./regression/regcombination/RegPreRandomForests.cpp
+./regression/regressionbase/RegressionAlgorithmKernel.cpp
+./regression/splineregression/CRSplineReg.cpp
 )
 SET(nice_vislearning_HDR
 ./image/ImagePyramid.h
@@ -249,6 +261,25 @@ SET(nice_vislearning_HDR
 ./regression/regressionbase/RegressionAlgorithmKernel.h
 ./regression/regressionbase/RegressionAlgorithm.h
 ./regression/regressionbase/TeachWithInverseKernelMatrix.h
+./regression/linregression/LinRegression.h
+./regression/linregression/RANSACReg.h
+./regression/npregression/RegKNN.h
+./regression/randomforest/RTBClusterRandom.cpp
+./regression/randomforest/RTBGrid.cpp
+./regression/randomforest/RTBLinear.cpp
+./regression/randomforest/RTBMeanPostImprovement.cpp
+./regression/randomforest/RTBClusterRandom.h
+./regression/randomforest/RTBGrid.h
+./regression/randomforest/RTBLinear.h
+./regression/randomforest/RTBMeanPostImprovement.h
+./regression/randomforest/RTBMinDist.h
+./regression/randomforest/RTBRandom.h
+./regression/randomforest/RegRandomForests.h
+./regression/randomforest/RegressionNode.h
+./regression/randomforest/RegressionTree.h
+./regression/randomforest/RegressionTreeBuilder.h
+./regression/regcombination/RegPreRandomForests.h
+./regression/splineregression/CRSplineReg.h
 ./optimization/quadprog/Array.h
 ./optimization/quadprog/QuadProg++.h
 ./optimization/mapestimation/MAPMultinomialDirichlet.h

+ 6 - 0
progfiles.cmake

@@ -25,6 +25,12 @@ set(nice_vislearning_PROGFILES_SRC
 ./classifier/progs/toyExampleUnsupervisedGP.cpp
 ./classifier/kernelclassifier/progs/testNullSpace.cpp
 ./classifier/kernelclassifier/progs/laplaceTests.cpp
+./regression/progs/testLinRegression.cpp
+./regression/progs/testNPRegression.cpp
+./regression/progs/testRANSACRegression.cpp
+./regression/progs/testRegressionRDF.cpp
+./regression/progs/testRegressionGP.cpp
+./regression/progs/testSplineRegression.cpp
 )
 
 set(nice_vislearning_PROGFILES_HDR

+ 4 - 3
regression/gpregression/RegGaussianProcess.cpp

@@ -68,6 +68,8 @@ RegGaussianProcess::RegGaussianProcess ( const RegGaussianProcess & src ) :
 {
 	kInvY = src.kInvY;
 	verbose = src.verbose;
+  useLooParameters = src.useLooParameters;
+  maxIterations = src.maxIterations;
 	optimizeParameters = src.optimizeParameters;
 	optimizationMethod = src.optimizationMethod;
 	traceApproximation = src.traceApproximation;
@@ -111,7 +113,7 @@ void RegGaussianProcess::teach ( KernelData *kernelData, const NICE::Vector & y
 				if ( verbose ) 
 					cerr << "RegGaussianProcess: using conjugate gradient optimizer" << endl;
 
-				FirstOrderRasmussen *optimizer = new FirstOrderRasmussen();
+				FirstOrderRasmussen *optimizer = new FirstOrderRasmussen( verbose );
 				optimizer->setEpsilonG ( 0.01 );
 				optimizer->setMaxIterations ( -maxIterations );
 				optimizer->optimizeFirst ( gpopt );
@@ -137,8 +139,7 @@ void RegGaussianProcess::teach ( KernelData *kernelData, const NICE::Vector & y
 			fthrow(Exception, "KCGPRegression: you have to specify a kernel function !" );
 		}
 	} else {
-
-		if ( !kernelData->hasCholeskyFactorization() )
+		if ( !kernelData->hasCholeskyFactorization() ) 
 			kernelData->updateCholeskyFactorization();
 	}
 

+ 3 - 0
regression/gpregression/RegGaussianProcess.h

@@ -16,6 +16,7 @@
 #include "vislearning/regression/regressionbase/TeachWithInverseKernelMatrix.h"
 
 #include "vislearning/regression/gpregression/modelselcrit/genericGPModelSelection.h"
+#include "vislearning/regression/regressionbase/RegressionAlgorithmKernel.h"
 
 namespace OBJREC {
   
@@ -58,6 +59,8 @@ class RegGaussianProcess : public RegressionAlgorithmKernel
 		/** simple destructor */
 		virtual ~RegGaussianProcess();
 		 
+    using RegressionAlgorithmKernel::teach;   // <-- un-hides teach function
+    
 		/** learn parameters/models/whatever with a kernel matrix of a set
 		 *  of vectors and the corresponding function values \c y 
 		 */

+ 137 - 0
regression/linregression/LinRegression.cpp

@@ -0,0 +1,137 @@
+/**
+* @file LinRegression.cpp
+* @brief Algorithm for linear regression
+* @author Frank Prüfer
+* @date 08/13/2013
+
+*/  
+
+#include "vislearning/regression/linregression/LinRegression.h"
+#include "core/vector/Algorithms.h"
+#include "LinRegression.h"
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+LinRegression::LinRegression()
+{
+  dim = 0;
+}
+
+LinRegression::LinRegression(uint dimension)
+{
+  dim = dimension;
+}
+
+LinRegression::LinRegression ( const LinRegression & src ) : 
+RegressionAlgorithm ( src )
+{
+dim = src.dim;
+modelParams = src.modelParams;
+}
+
+LinRegression::~LinRegression()
+{
+}
+
+LinRegression* LinRegression::clone ( void ) const
+{
+  return new LinRegression(*this);
+}
+
+
+void LinRegression::teach ( const NICE::VVector & x, const NICE::Vector & y )
+{  
+  if (dim == 0){	//dimension not specified via constructor
+    dim = x[0].size()+1;  //use full dimension of data
+  }
+  
+  for ( uint i = 0;i < dim;i++ ){  //initialize vector of model parameters
+    modelParams.push_back(0.0);
+  }
+  
+  if ( dim == 2 )         //two-dimensional least squares
+  {  
+    double meanX;
+    double meanY = y.Mean();
+    double sumX = 0.0;
+    
+    for ( uint i = 0;i < x.size();i++ )
+      sumX += x[i][0];
+
+    meanX = sumX / (double)x.size();
+    
+    for ( uint i = 0; i < x.size(); i++ )
+      modelParams[1] += x[i][0] * y[i];
+
+    modelParams[1] -= x.size() * meanX * meanY;
+    
+    double tmp = 0.0;
+    for ( uint i = 0; i < x.size(); i++ )
+      tmp += x[i][0] * x[i][0];
+
+    tmp -= x.size() * meanX * meanX;
+    
+    modelParams[1] /= tmp;
+    
+    modelParams[0] = meanY - modelParams[1] * meanX;
+  }
+  else {  //N-dimensional least squares
+    NICE::Matrix X, tmp, G;
+    NICE::Vector params;
+    
+    x.toMatrix(X);
+    
+    NICE::Matrix Xtmp(X.rows(),X.cols()+1,1.0);
+    
+    // attach front column with ones
+
+    for(uint row = 0;row<X.rows();row++)
+    {
+      for(uint col = 0;col<X.cols();col++)
+      {
+        Xtmp(row,col+1) = X(row,col);
+      }
+    }
+
+    // modelParams =(X'X)^-1 * X'y
+    NICE::Matrix tmpInv;
+    NICE::Vector rhs;
+      
+    rhs.multiply(Xtmp,y,true);
+    tmp.multiply(Xtmp,Xtmp,true);
+      
+    choleskyDecomp(tmp,G);
+    choleskyInvert(G,tmpInv);
+
+    params.multiply(tmpInv,rhs);
+
+    modelParams = params.std_vector();
+  }
+}
+
+std::vector<double> LinRegression::getModelParams()
+{
+  return modelParams;
+}
+
+double LinRegression::predict ( const NICE::Vector & x )
+{
+  double y;
+  if ( dim == 2 )     //two-dimensional least squares
+  {  
+    y = modelParams[0] + modelParams[1] * x[0];
+  }
+  else {
+    // y = x * modelParams
+    NICE::Vector nModel(modelParams);
+    NICE:: Vector xTmp(1,1.0);
+    xTmp.append(x);
+    y = xTmp.scalarProduct(nModel);
+  }
+  
+  return y;
+}

+ 57 - 0
regression/linregression/LinRegression.h

@@ -0,0 +1,57 @@
+/**
+* @file LinRegression.h
+* @brief Algorithm for linear regression
+* @author Frank Prüfer
+* @date 08/13/2013
+
+*/
+#ifndef LINREGRESSIONINCLUDE
+#define LINREGRESSIONINCLUDE
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+namespace OBJREC
+{
+class LinRegression : public RegressionAlgorithm
+{
+  protected:
+    /** vector containing all model parameters */
+    std::vector<double> modelParams;
+    
+    /** dimensionality of the model (i.e. number of model parameters) */
+    uint dim;
+  
+  public:
+    /** simple constructor */
+    LinRegression();
+    
+    /** constructor, specifying the dimensionality of the model*/
+    LinRegression(uint dimension);
+    
+    /** copy constructor */
+    LinRegression ( const LinRegression & src ); 
+    
+    /** simple destructor */
+    virtual ~LinRegression();
+    
+    /** clone function */
+    LinRegression* clone (void) const;
+    
+    /** method to learn model parameters */
+    void teach ( const NICE::VVector & x, const NICE::Vector & y );
+    
+    /** returns model parameters as a vector */
+    std::vector<double> getModelParams();
+    
+    /** method to predict function value */
+    double predict ( const NICE::Vector & x );
+};
+
+}	//namespace
+
+#endif

+ 8 - 0
regression/linregression/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/linregression/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 117 - 0
regression/linregression/RANSACReg.cpp

@@ -0,0 +1,117 @@
+/**
+* @file RANSACReg.cpp
+* @brief Implementation of RANSAC (RANdom SAmple Consensus) for regression purposes
+* @author Frank Prüfer
+* @date 09/10/2013
+
+*/  
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <ctime>
+
+#include "vislearning/regression/linregression/LinRegression.h"
+#include "vislearning/regression/linregression/RANSACReg.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RANSACReg::RANSACReg ( const Config *_conf )
+{
+  if ( _conf->gB("RANSACReg","start_random_generator" ) )
+    std::srand ( unsigned ( std::time(0) ) );
+  threshold = _conf->gD("RANSACReg","threshold",0.5);
+  iter = _conf->gI("RANSACReg","iterations",10);
+}
+
+RANSACReg::RANSACReg ( const RANSACReg & src ) : RegressionAlgorithm ( src )
+{
+  threshold = src.threshold;
+  n = src.n;
+  iter = src.iter;
+  dataSet = src.dataSet;
+  labelSet = src.labelSet;
+  modelParams = src.modelParams;
+}
+
+RANSACReg::~RANSACReg()
+{
+}
+
+RANSACReg* RANSACReg::clone ( void ) const
+{
+  return new RANSACReg(*this);
+}
+
+void RANSACReg::teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet )
+{ 
+  NICE::VVector best_CS(0,0);
+  std::vector<double> best_labelCS;
+  
+  vector<int> indices;
+  for ( uint i = 0; i < dataSet.size(); i++ )
+    indices.push_back(i);
+  
+  n = dataSet[0].size()+1;
+
+  for ( uint i = 0; i < iter; i++ ){
+    random_shuffle( indices.begin(), indices.end() );
+    NICE::VVector randDataSubset;
+    std::vector<double> randLabelSubset;
+    
+    for ( uint j = 0; j < n; j++ ){	//choose random subset of n points
+      randDataSubset.push_back( dataSet[indices[j]] );
+      randLabelSubset.push_back( labelSet[indices[j]] );
+    }
+    
+    LinRegression *linReg = new LinRegression ();
+    linReg->teach ( randDataSubset, (NICE::Vector)randLabelSubset );	//do LinRegression on subset
+    std::vector<double> tmp_modelParams = linReg->getModelParams();
+    
+    NICE::VVector current_CS;
+    std::vector<double> current_labelCS;
+    
+#pragma omp parallel for    
+    for ( uint j = n; j < indices.size(); j++ ){	//compute distance between each datapoint and current model
+      double lengthNormalVector = 0; 
+      double sum = 0;
+      for ( uint k = 0; k < tmp_modelParams.size(); k++ ){
+	sum += tmp_modelParams[k] * dataSet[indices[j]][k];
+	lengthNormalVector += tmp_modelParams[k] * tmp_modelParams[k];
+      }
+      lengthNormalVector = sqrt(lengthNormalVector);
+      
+      double distance = ( sum - labelSet[indices[j]] ) / lengthNormalVector;
+
+#pragma omp critical
+      if ( abs(distance) < threshold ){	//if point is close to model, it belongs to consensus set
+	current_CS.push_back ( dataSet[indices[j]] );
+	current_labelCS.push_back ( labelSet[indices[j]] );
+      }
+    }
+    
+    if ( current_CS.size() > best_CS.size() ){	//if consensus set contains more points than any previous one, take this model as best_model
+      best_CS = current_CS;
+      best_labelCS = current_labelCS;
+    }
+  }
+  
+  LinRegression *best_linReg = new LinRegression ();	//compute best_model again with all points of best_consensusSet
+  best_linReg->teach ( best_CS, (NICE::Vector)best_labelCS );
+  modelParams = best_linReg->getModelParams();    
+}
+  
+double RANSACReg::predict ( const NICE::Vector & x )
+{
+  NICE::Vector nModel(modelParams);
+  NICE:: Vector xTmp(1,1.0);
+  xTmp.append(x);
+  double y = xTmp.scalarProduct(nModel);
+
+  return y;
+  
+}

+ 66 - 0
regression/linregression/RANSACReg.h

@@ -0,0 +1,66 @@
+/**
+* @file RANSACReg.h
+* @brief Implementation of RANSAC (RANdom SAmple Consensus) for regression purposes
+* @author Frank Prüfer
+* @date 09/10/2013
+
+*/   
+#ifndef RANSACREGINCLUDE
+#define RANSACREGINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/Config.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+namespace OBJREC
+{
+class RANSACReg : public RegressionAlgorithm
+{
+  protected:
+    /** threshold value for determining when a datum fits a model */
+    double threshold;
+    
+    /** mminimum number of data required to fit the model */
+    uint n;
+    
+    /** number of iterations performed by the algorithm */
+    uint iter;
+    
+    /** vector of model parameters */
+    std::vector<double> modelParams;
+    
+    /** set of data points */
+    NICE::VVector dataSet;
+    
+    /** set of responses according to dataset */
+    std::vector<double> labelSet;
+    
+
+  public:
+    /** simple constructor */
+    RANSACReg ( const NICE::Config *conf );
+    
+    /** copy constructor */
+    RANSACReg ( const RANSACReg & src );
+    
+    /** simple destructor */
+    virtual ~RANSACReg();
+    
+    /** clone function */
+    RANSACReg* clone (void) const;
+    
+    /** predict response using simple vector */
+    double predict ( const NICE::Vector & x );
+    
+    /** teach whole set at once */
+    void teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet );
+
+};
+}	//namespace
+
+
+
+#endif

+ 1 - 0
regression/linregression/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)

+ 8 - 0
regression/npregression/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/npregression/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 135 - 0
regression/npregression/RegKNN.cpp

@@ -0,0 +1,135 @@
+/**
+* @file RegKNN.cpp
+* @brief Implementation of k-Nearest-Neighbor algorithm for regression purposes
+* @author Frank Prüfer
+* @date 08/29/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+
+#include "vislearning/regression/npregression/RegKNN.h"
+
+#include "vislearning/math/mathbase/FullVector.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+
+RegKNN::RegKNN ( const Config *_conf, NICE::VectorDistance<double> *_distancefunc ) : distancefunc (_distancefunc)
+{
+  K = _conf->gI("RegKNN", "K", 1 );
+  if ( _distancefunc == NULL )
+  distancefunc = new EuclidianDistance<double>();
+}
+
+RegKNN::RegKNN ( const RegKNN & src ) : RegressionAlgorithm ( src )
+{
+  dataSet = src.dataSet;
+  labelSet = src.labelSet;
+  distancefunc = src.distancefunc;
+  K = src.K;
+}
+
+RegKNN::~RegKNN ()
+{
+}
+
+RegKNN* RegKNN::clone ( void ) const
+{
+  return new RegKNN(*this);
+}
+
+
+void RegKNN::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet)
+{
+  fprintf (stderr, "teach using all !\n");
+  //NOTE this is crucial if we clear _teachSet afterwards!
+  //therefore, take care NOT to call _techSet.clear() somewhere out of this method
+  this->dataSet = _dataSet;
+  this->labelSet = _labelSet.std_vector();
+  
+  std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;   
+    
+}
+
+void RegKNN::teach ( const NICE::Vector & x, const double & y )
+{
+  std::cerr << "RegKNN::teach one new example" << std::endl;
+
+  for ( size_t i = 0 ; i < x.size() ; i++ )
+    if ( isnan(x[i]) ) 
+    {
+        fprintf (stderr, "There is a NAN value within this vector: x[%d] = %f\n", (int)i, x[i]);
+        cerr << x << endl;
+        exit(-1);
+    }
+
+  dataSet.push_back ( x );
+
+  labelSet.push_back ( y );
+
+  std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
+}
+
+double RegKNN::predict ( const NICE::Vector & x )
+{
+  FullVector distances(dataSet.size());
+
+  if ( dataSet.size() <= 0 )
+  {
+    fprintf (stderr, "RegKNN: please use the teach method first\n");
+    exit(-1);
+  }
+
+#pragma omp parallel for
+  for(uint i = 0; i < dataSet.size(); i++)
+  {
+    double distance = distancefunc->calculate (x,dataSet[i]);
+
+    if ( isnan(distance) )
+    {
+      fprintf (stderr, "RegKNN::predict: NAN value found !!\n");
+      cerr << x << endl;
+      cerr << dataSet[i] << endl;
+    }
+// #pragma omp critical      
+    distances[i] = distance;     
+  }
+    
+  std::vector<int> ind;
+  distances.getSortedIndices(ind);
+
+  double response = 0.0;  
+    
+  if ( dataSet.size() < K )
+  {
+    cerr << K << endl;
+    K = dataSet.size();
+    cerr<<"RegKNN: Not enough datapoints! Setting K to: "<< K <<endl;
+  }
+
+  if ( distances[ind[0]] == 0.0 ) {
+    cerr<<"RegKNN: Warning: datapoint was already seen during training... using its label as prediction."<<endl;
+    return labelSet[ind[0]];  
+  }
+
+  double maxElement = distances.max();	//normalize distances
+  distances.multiply(1.0/maxElement);
+
+  double weightSum = 0.0;
+
+  for(uint i = 0; i < K; i++)
+  {
+    response += 1.0/distances[ind[i]] * labelSet[ind[i]];
+    weightSum += 1.0/distances[ind[i]];
+  }
+
+  return ( response / weightSum );
+}

+ 62 - 0
regression/npregression/RegKNN.h

@@ -0,0 +1,62 @@
+/**
+* @file RegKNN.h
+* @brief Implementation of k-Nearest-Neighbor algorithm for regression purposes
+* @author Frank Prüfer
+* @date 08/29/2013
+
+*/ 
+#ifndef REGKNNINCLUDE
+#define REGKNNINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/Config.h"
+
+#include <core/vector/Distance.h>
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+namespace OBJREC
+{ 
+class RegKNN : public RegressionAlgorithm
+{
+  protected:
+    int K;
+    
+    /** set of data points */
+    NICE::VVector dataSet;
+    
+    /** set of responses according to dataset */
+    std::vector<double> labelSet;
+    
+    /** used distance function */
+    NICE::VectorDistance<double> *distancefunc;
+  
+  public:
+    /** simple constructor */
+    RegKNN ( const NICE::Config *conf, NICE::VectorDistance<double> *distancefunc = NULL );
+    
+    /** copy constructor */
+    RegKNN ( const RegKNN & src );
+    
+    /** simple destructor */
+    virtual ~RegKNN();
+    
+    /** clone function */
+    RegKNN* clone (void) const;
+    
+    /** predict response using simple vector */
+    double predict ( const NICE::Vector & x );
+    
+    /** teach whole set at once */
+    void teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet );
+
+    /** teach one data point at a time */
+    void teach ( const NICE::Vector & x, const double & y );
+};
+
+}	//namespace
+
+#endif

+ 1 - 0
regression/npregression/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)

+ 285 - 0
regression/progs/testLinRegression.cpp

@@ -0,0 +1,285 @@
+/**
+* @file testLinRegression.cpp
+* @brief test of linear regression
+* @author Frank Prüfer
+* @date 08/13/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/linregression/LinRegression.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up PreRDF--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing LinRegression...";
+    LinRegression *linReg = new LinRegression ();
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the LinRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    linReg->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = linReg->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 

+ 290 - 0
regression/progs/testNPRegression.cpp

@@ -0,0 +1,290 @@
+/**
+* @file testNPRegression.cpp
+* @brief test of non-parametric regression
+* @author Frank Prüfer
+* @date 08/29/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/npregression/RegKNN.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up NPRegression--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing NPRegression...";
+    RegKNN *knn = new RegKNN (&conf, NULL);
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the NPRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    knn->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+#pragma omp parallel for    
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = knn->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 

+ 286 - 0
regression/progs/testRANSACRegression.cpp

@@ -0,0 +1,286 @@
+/**
+* @file testRANSACRegression.cpp
+* @brief test of RANSAC regression
+* @author Frank Prüfer
+* @date 09/11/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/linregression/RANSACReg.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up PreRDF--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing LinRegression...";
+    RANSACReg *RReg = new RANSACReg ( &conf );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the LinRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    RReg->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = RReg->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 
+ 

+ 272 - 0
regression/progs/testRegressionGP.cpp

@@ -0,0 +1,272 @@
+/**
+* @file testRegressionGP.cpp
+* @brief test of GP
+* @author Sven Sickert
+* @date 07/11/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+//#include "vislearning/baselib/ICETools.h"
+#include "vislearning/regression/gpregression/RegGaussianProcess.h"
+#include "vislearning/math/kernels/KernelExp.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int dim = 10; //TODO fixed data dimension
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    NICE::Vector vec(dim);
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.set(i,dval);
+    }
+    Data.push_back(vec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame ( Config confRDF,
+                 NICE::VVector &xdata,
+                 NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  
+  int trainingSize = (int)(.5*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+
+  int nfolds = confRDF.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+  
+  KernelExp *kernel_template = new KernelExp ( confRDF.gD("Kernel", "log_rbf_gamma", -2.5), 0.0 );
+  
+ 
+  /*--------------Setting up GP----------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+    cerr << "Initializing GP regression...";
+    Kernel *kernel_function = NULL;
+    kernel_function = new KernelExp ( *(kernel_template) );
+    RegGaussianProcess *regGP = new RegGaussianProcess( &confRDF, kernel_function, "GPRegression" );
+    
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the GP regression...";
+    regGP->teach( trainData, trainVals );
+    cerr << "Finished." << endl;
+
+    /*---------------Testing GP----------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = regGP->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    for (int j = 0; j < testingSize; j++)
+      cerr << testVals[j] << " " << predictionValues[j] << endl;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  string path = "/home/sickert/data/cosre-MPI/regression-fluxcom/_DATA/";
+  Config confRDF(path+"config.conf");  //Config for RDF
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  /*----------Load dataset---------*/
+  loadData(xdata, y, path, "flux_x.csv", "flux_y.csv"); //load all data
+  
+  testFrame( confRDF, xdata, y );
+
+  return 0;
+}
+
+

+ 327 - 0
regression/progs/testRegressionRDF.cpp

@@ -0,0 +1,327 @@
+/**
+* @file testRegressionRDF.cpp
+* @brief test of RDF with arbitrary leaf regression method
+* @author Sven Sickert
+* @date 07/02/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/regcombination/RegPreRandomForests.h"
+#include "vislearning/regression/gpregression/RegGaussianProcess.h"
+#include "vislearning/regression/linregression/LinRegression.h"
+#include "vislearning/regression/npregression/RegKNN.h"
+
+#include "vislearning/math/kernels/KernelExp.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame ( Config confRDF,
+                 NICE::VVector &xdata,
+                 NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = confRDF.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = confRDF.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveForest = confRDF.gB( "debug", "save_forest", false );
+  string leafReg = confRDF.gS( "PreRandomForest", "leaf_regression", "gp" );
+  
+  KernelExp *kernel_template = new KernelExp ( confRDF.gD("Kernel", "log_rbf_gamma", -2.5), 0.0 );
+  
+  /*------------Store Configuration------------*/
+  string filename = confRDF.gS( "debug", "filename" );
+  
+  if ( saveForest )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+    storeEvalData << "random_split_tests=" << confRDF.gI ( "RTBRandom", "random_split_tests" ) << endl;
+    storeEvalData << "random_features=" << confRDF.gI ( "RTBRandom", "random_features" ) << endl;
+    storeEvalData << "max_depth=" << confRDF.gI ( "RTBRandom", "max_depth" ) << endl;
+    storeEvalData << "random_split_mode=" << confRDF.gS ( "RTBRandom", "random_split_mode" ) << endl;
+    storeEvalData << "min_examples=" << confRDF.gI ( "RTBRandom", "min_examples" ) << endl;
+    storeEvalData << "number_of_trees=" << confRDF.gI ( "RandomForest", "number_of_trees" ) << endl;
+    storeEvalData << "features_per_tree=" << confRDF.gD ( "RandomForest", "features_per_tree" ) << endl;
+    storeEvalData << "samples_per_tree=" << confRDF.gD ( "RandomForest", "samples_per_tree" ) << endl;
+    storeEvalData << "builder=" << confRDF.gS ( "RandomForest", "builder" ) << endl;
+    storeEvalData << "minimum_error_reduction=" << confRDF.gD ( "RandomForest", "minimum_error_reduction" ) << endl;
+    storeEvalData << "log_rbf_gamma=" << confRDF.gD ( "Kernel", "log_rbf_gamma" ) << endl;
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up PreRDF--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+    cerr << "Initializing leaf regression method " << leafReg << "...";
+    RegressionAlgorithm *leafRegression = NULL;
+    Kernel *kernel_function = NULL;
+    if ( leafReg == "GaussProcess" )
+    {
+      kernel_function = new KernelExp ( *(kernel_template) );
+      leafRegression = new RegGaussianProcess( &confRDF, kernel_function, "GPRegression" );
+    }
+    else if ( leafReg == "Linear" )
+      leafRegression = new LinRegression ();
+    else if ( leafReg == "KNN" )
+      leafRegression = new RegKNN ( &confRDF, NULL);
+    else if ( leafReg == "none" ) {
+      cerr << "\ntestRegressionRDFGP::testFrame: No leaf regression method set! Using RandomForest prediction..." << endl;
+    } else {
+      cerr << "\ntestRegressionRDFGP::testFrame: No valid leaf regression method set! Aborting..." << endl;
+      exit(-1);
+    }
+    cerr << "Finished." << endl;
+
+    cerr << "Initializing PreRDF for regression...";
+    RegPreRandomForests *prf = new RegPreRandomForests ( &confRDF, "PreRandomForest", leafRegression );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the PreRDF for regression...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    prf->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = prf->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+    
+    if (kernel_function != NULL)
+      delete kernel_function;
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+  
+  /*-----------------Cleaning------------------*/
+  delete kernel_template;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //Config for RFGP
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+

+ 291 - 0
regression/progs/testSplineRegression.cpp

@@ -0,0 +1,291 @@
+ 
+/**
+* @file testSplineRegression.cpp
+* @brief test of spline regression
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/splineregression/CRSplineReg.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up NPRegression--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing NPRegression...";
+    CRSplineReg *spline = new CRSplineReg ( &conf );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the NPRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    spline->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+#pragma omp parallel for    
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = spline->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 

+ 8 - 0
regression/randomforest/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/randomforest/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 167 - 0
regression/randomforest/RTBClusterRandom.cpp

@@ -0,0 +1,167 @@
+/**
+* @file RTBClusterRandom.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/19/2013
+
+*/
+#include <iostream>
+
+#include "RTBClusterRandom.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBClusterRandom::RTBClusterRandom( const Config *conf, std::string section )
+{
+  max_depth = conf->gI(section, "max_depth", 20 );
+  min_examples = conf->gI(section, "min_examples", 10);
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBClusterRandom::~RTBClusterRandom()
+{
+}
+
+bool RTBClusterRandom::balancingLeftRight(const vector< pair< double, int > > values,
+          double threshold,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+    }
+    else
+    {
+      count_right++;
+    }
+  }
+  
+#ifdef DETAILTREE
+  fprintf (stderr, "left vs. right: %d : %d\n", count_left, count_right );
+#endif
+  
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  return true;
+}
+
+RegressionNode *RTBClusterRandom::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBClusterRandom: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBClusterRandom: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  vector<pair<double, int> > values;
+  
+  int f = rand() % x[0].size();
+    
+  values.clear();
+  collectFeatureValues ( x, selection, f, values );
+    
+  double median   = (values.begin() + values.size() / 2)->first;
+    
+#ifdef DETAILTREE
+  double minValue = (min_element ( values.begin(), values.end() ))->first;
+  double maxValue = (max_element ( values.begin(), values.end() ))->first;
+  fprintf (stderr, "max %f min %f med %f\n", maxValue, minValue, median );
+#endif
+    
+  int count_left, count_right;
+  if ( ! balancingLeftRight( values, median, count_left, count_right) )
+  {
+    fprintf ( stderr, "RTBClusterRandom: no split possible (empty leaf)\n" );
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+      
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %d\n", median, f );
+#endif
+  
+  node->f = f;
+  node->threshold = median;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < median )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBClusterRandom::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 59 - 0
regression/randomforest/RTBClusterRandom.h

@@ -0,0 +1,59 @@
+/**
+* @file RTBClusterRandom.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/19/2013
+
+*/
+#ifndef RTBCLUSTERRANDOMINCLUDE
+#define RTBCLUSTERRANDOMINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBClusterRandom : public RegressionTreeBuilder
+{
+  
+  protected:
+    int max_depth;
+    int min_examples;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    bool balancingLeftRight ( const std::vector< std::pair< double, int > > values,
+          double threshold,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBClusterRandom( const NICE::Config *conf, std::string section = "RTBClusterRandom" );
+    
+    /** simple destructor */
+    virtual ~RTBClusterRandom();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+}
+
+#endif

+ 190 - 0
regression/randomforest/RTBGrid.cpp

@@ -0,0 +1,190 @@
+/**
+* @file RTBGrid.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/15/2013
+
+*/
+#include <iostream>
+
+#include "RTBGrid.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBGrid::RTBGrid( const Config *conf, std::string section )
+{
+  max_depth = conf->gI(section, "max_depth", 20 );
+  min_examples = conf->gI(section, "min_examples", 10);
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBGrid::~RTBGrid()
+{
+}
+
+bool RTBGrid::balancingLeftRight(const vector< pair< double, int > > values,
+          double threshold,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+    }
+    else
+    {
+      count_right++;
+    }
+  }
+  
+#ifdef DETAILTREE
+  fprintf (stderr, "left vs. right: %d : %d\n", count_left, count_right );
+#endif
+  
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  return true;
+}
+
+RegressionNode *RTBGrid::buildRecursive ( const NICE::VVector & x,
+          const std::vector<std::vector<double> > & limits,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBGrid: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBGrid: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  vector<pair<double, int> > values;
+  
+  int f = depth % x[0].size();
+    
+  values.clear();
+  collectFeatureValues ( x, selection, f, values );
+    
+#ifdef DETAILTREE
+  double minValue = (min_element ( values.begin(), values.end() ))->first;
+  double maxValue = (max_element ( values.begin(), values.end() ))->first;
+  fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    
+  double threshold = 0.5 * (limits[f][0]+limits[f][1]);
+  int tmp = depth;
+  while( tmp > (int)x[0].size() )
+  {
+    threshold *= 0.5;
+    tmp -= x[0].size();
+  }
+      
+  int count_left, count_right;
+  if ( ! balancingLeftRight( values, threshold, count_left, count_right) )
+  {
+    fprintf ( stderr, "RTBGrid: no split possible (empty leaf)\n" );
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+      
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %d\n", threshold, f );
+#endif
+  
+  node->f = f;
+  node->threshold = threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, limits, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, limits, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBGrid::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  // get min/max values for all features
+  int fcount = x[0].size();
+  vector< vector<double> > limits;
+  for ( int j = 0; j < fcount; j++ )
+  {
+    double min = numeric_limits<double>::max();
+    double max = numeric_limits<double>::min();
+    for ( int i = 0; i < x.size(); i++ )
+    {
+      double value = x[i][j];
+      if (value > max ) max = value;
+      if (value < min ) min = value;
+    }
+    vector<double> flimit;
+    flimit.push_back(min);
+    flimit.push_back(max);
+    limits.push_back(flimit);
+  }
+  
+  return buildRecursive( x, limits, all, 0);
+}

+ 59 - 0
regression/randomforest/RTBGrid.h

@@ -0,0 +1,59 @@
+/**
+* @file RTBGrid.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/15/2013
+
+*/
+#ifndef RTBGRIDINCLUDE
+#define RTBGRIDINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBGrid : public RegressionTreeBuilder
+{
+  
+  protected:
+    int max_depth;
+    int min_examples;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const std::vector< std::vector< double > > & limits,
+          std::vector<int> & selection,
+          int depth);
+
+    bool balancingLeftRight ( const std::vector< std::pair< double, int > > values,
+          double threshold,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBGrid( const NICE::Config *conf, std::string section = "RTBGrid" );
+    
+    /** simple destructor */
+    virtual ~RTBGrid();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+}
+
+#endif

+ 258 - 0
regression/randomforest/RTBLinear.cpp

@@ -0,0 +1,258 @@
+/**
+* @file RTBLinear.cpp
+* @brief random regression tree, which learns a LSE-model in every inner node during training
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#include <iostream>
+
+#include "RTBLinear.h"
+#include "vislearning/regression/linregression/LinRegression.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBLinear::RTBLinear( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBLinear::~RTBLinear()
+{
+}
+
+void RTBLinear::computeLinearLSError( const VVector& x,
+          const Vector& y,
+          const int& numEx,
+          double& lsError)
+{
+  LinRegression *lreg = new LinRegression;
+  lreg->teach ( x, y);
+
+  NICE::Vector diff ( numEx );
+  for ( int i = 0; i < numEx; i++ ){
+    diff[i] = y[i] - lreg->predict ( x[i] );
+    diff[i] *= diff[i];
+  }
+
+  lsError = diff.Mean();
+  delete lreg;
+}
+
+bool RTBLinear::errorReductionLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          double& error_left,
+          double& error_right,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  vector<int> selection_left;
+  vector<int> selection_right;
+  
+  NICE::VVector xLeft;
+  NICE::VVector xRight;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+      selection_left.push_back( it->second );
+      NICE::Vector tmp(1,value);
+      xLeft.push_back( tmp );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( it->second );
+      NICE::Vector tmp2(1,value);
+      xRight.push_back( tmp2 );
+    }
+  }
+
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  if ( (count_left < min_examples)  || (count_right < min_examples) )
+    return false; // no split
+  
+
+  NICE::Vector yLeft (count_left);
+  for ( int i = 0; i < count_left; i++ ){
+    yLeft[i] = y[selection_left[i]];
+  }
+  computeLinearLSError(xLeft, yLeft, count_left, error_left);
+
+  NICE::Vector yRight (count_right);
+  for ( int i = 0; i < count_right; i++ ){
+    yRight[i] = y[selection_right[i]];
+  }
+  computeLinearLSError(xRight, yRight, count_right, error_right);
+  
+  return true;
+}
+
+RegressionNode *RTBLinear::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+//  node->nodePrediction( y, selection );
+  double lsError;
+  computeLinearLSError( x, y, (int)x.size(), lsError);
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBLinear: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBLinear: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_reduct = -1.0;
+//  vector<pair<double, int> > best_values;
+  vector<pair<double, int> > values;
+  double lsError_left = 0.0;
+  double lsError_right = 0.0;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
+#endif
+      lsError_left = 0.0;
+      lsError_right = 0.0;
+      
+      int count_left, count_right;
+      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
+          lsError_right, count_left, count_right) )
+        continue;
+      
+      //double pl = (count_left) / (count_left +count_right);
+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
+      double errorReduction = lsError - lsError_left - lsError_right;
+      
+      if ( errorReduction > best_reduct )
+      {
+        best_reduct = errorReduction;
+        best_threshold =  threshold;
+        best_feature = f;
+#ifdef DETAILTREE
+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+      }
+    }
+  }
+  
+  if ( best_reduct < minimum_error_reduction )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBLinear: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBLinear::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+} 

+ 77 - 0
regression/randomforest/RTBLinear.h

@@ -0,0 +1,77 @@
+/**
+* @file RTBLinear.h
+* @brief random regression tree, which learns a LSE-model in every inner node during training
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#ifndef RTBLINEARINCLUDE
+#define RTBLINEARINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBLinear : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_error_reduction;
+    
+    int random_split_mode;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    enum {
+      RANDOM_SPLIT_INDEX = 0,
+      RANDOM_SPLIT_UNIFORM
+    };
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    void computeLinearLSError ( const NICE::VVector & x,
+                                const NICE::Vector & y,
+                                const int & numEx,
+                                double & lsError);
+
+    bool errorReductionLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          double & error_left,
+          double & error_right,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBLinear( const NICE::Config *conf, std::string section = "RTBLinear" );
+    
+    /** simple destructor */
+    virtual ~RTBLinear();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif 

+ 289 - 0
regression/randomforest/RTBMeanPostImprovement.cpp

@@ -0,0 +1,289 @@
+/**
+* @file RTBMeanPostImprovement.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 07/23/2013
+
+*/
+#define _USE_MATH_DEFINES
+
+#include <iostream>
+#include <math.h>
+#include "RTBMeanPostImprovement.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBMeanPostImprovement::RTBMeanPostImprovement( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_improvement = conf->gD("RandomForest", "minimum_improvement", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  auto_bandwith = conf->gB(section, "auto_bandwith", true);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBMeanPostImprovement::~RTBMeanPostImprovement()
+{
+}
+
+bool RTBMeanPostImprovement::improvementLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          vector<double> & empDist_left,
+          vector<double> & empDist_right,
+          int& count_left,
+          int& count_right,
+          double& h,
+          double& p )
+{
+  count_left = 0;
+  count_right = 0;
+  vector<double> selection_left;
+  vector<double> selection_right;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    if ( (it->first) < threshold )
+    {
+      count_left++;
+      selection_left.push_back( y[ it->second ] );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( y[ it->second ] );
+    }
+  }
+  
+  if ( (count_left < min_examples) || (count_right < min_examples) )
+    return false; // no split
+  
+  Vector vleft ( selection_left );
+  Vector vright ( selection_right );
+  
+  // empirical distribution [Taylor & Jones, 1996]
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double yval = y[ it->second ];
+    int smaller_left = 0;
+    int smaller_right = 0;
+    for ( int l = 0; l < count_left; l++ )
+    {
+      if ( selection_left[l] <= yval ) smaller_left++;
+    }
+    for ( int r = 0; r < count_right; r++ )
+    {
+      if ( selection_right[r] <= yval ) smaller_right++;
+    }
+    if ( (it->first) < threshold )
+    {
+      double emp = (double)(smaller_left)/(double)values.size();
+      empDist_left.push_back( emp );
+    } else {
+      double emp = (double)(smaller_right)/(double)values.size();
+      empDist_right.push_back( emp );
+    }
+  }
+  
+  // bandwidth parameter [Taylor & Jones, 1996]
+  if (auto_bandwith)
+  {
+    double sigma_hat = sqrt( vleft.StdDev()*vleft.StdDev() + vright.StdDev()*vright.StdDev() );
+    double z_hat = (double)( vleft.Mean() - vright.Mean() ) / sigma_hat;
+    p = (double)count_left / (double)values.size();
+    double tmp = (z_hat*z_hat - 1);
+    h = sigma_hat / (double)( 2 * sqrt(M_PI) * p * (1-p) * tmp*tmp * gaussianVal(z_hat, 1.0) );
+  }
+  else
+    h = 1.0;
+  
+  return true;
+}
+
+double RTBMeanPostImprovement::gaussianVal ( const double input,
+          const double bandwidth )
+{
+  return ( 1 / ( sqrt( 2 * M_PI ) * sqrt(2) * bandwidth ) * exp ( -0.25 * input * input ) );
+}
+
+RegressionNode *RTBMeanPostImprovement::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBMeanPostImprovement: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMeanPostImprovement: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_improvement = -1.0;
+  vector<pair<double, int> > values;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+    ofstream datafile;
+    char buffer [20];
+    int n = sprintf(buffer, "detailtree%d.dat", k);
+    datafile.open( buffer );
+    datafile << "# This file is called detailtree.dat" << endl;
+    datafile << "# Data of the Mean Posterior Improvement Criterium" << endl;
+    datafile << "# threshold \tI \t\tMPI" << endl;
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      //double step = (maxValue - minValue) / random_split_tests;
+      //threshold = minValue + i*step;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s (t) %d/%d (%f)\n", k, i, threshold );
+#endif
+      
+      vector<double> empDist_left, empDist_right;
+      int count_left, count_right;
+      double h, p;
+      if ( ! improvementLeftRight( values, y, threshold, empDist_left,
+          empDist_right, count_left, count_right, h, p) )
+        continue;
+      
+      // mean posterior improvement
+      double I_hat = 0.0;
+      for ( int l = 0; l < count_left; l++ )
+      {
+        for ( int r = 0; r < count_right; r++ ) 
+        {
+          I_hat += gaussianVal( (empDist_left[l] - empDist_right[r]), h );
+          //I_hat += (empDist_left[l] - empDist_right[r]);
+        }
+      }
+      I_hat /= ((double)count_left*(double)count_right);
+      double mpi_hat = p * (1-p) * (1-I_hat);
+
+#ifdef DETAILTREE
+      fprintf (stderr, "pL=%f, pR=%f, I=%f --> M=%f\n", p, (1-p), I_hat, mpi_hat);
+      datafile << threshold << " " << I_hat << " " << mpi_hat << endl;
+#endif      
+      
+      if ( mpi_hat > best_improvement )
+      {
+        best_improvement = mpi_hat;
+        best_threshold =  threshold;
+        best_feature = f;
+      }
+    }
+#ifdef DETAILTREE
+    datafile.close();
+#endif    
+  }
+
+#ifdef DETAILTREE
+  fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+  
+  if ( best_improvement < minimum_improvement )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMeanPostImprovement: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBMeanPostImprovement::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 72 - 0
regression/randomforest/RTBMeanPostImprovement.h

@@ -0,0 +1,72 @@
+/**
+* @file RTBMeanPostImprovement.h
+* @brief regression tree splitting criteria by Taylor and Jones, 1996
+* @author Sven Sickert
+* @date 07/23/2013
+
+*/
+#ifndef RTBMEANPOSTIMPROVEMENTINCLUDE
+#define RTBMEANPOSTIMPROVEMENTINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** regression tree splitting criteria by Taylor and Jones, 1996 */
+class RTBMeanPostImprovement : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_improvement;
+    
+    bool auto_bandwith;
+    
+    /** save indices in leaves */
+    bool save_indices;
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+    
+    double gaussianVal( const double input, 
+          const double bandwidth );
+
+    bool improvementLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          std::vector<double> & empDist_left,
+          std::vector<double> & empDist_right,
+          int & count_left,
+          int & count_right,
+          double& h,
+          double& p );
+
+  public:
+    
+    /** simple constructor */
+    RTBMeanPostImprovement( const NICE::Config *conf, std::string section = "RTBMeanPostImprovement" );
+    
+    /** simple destructor */
+    virtual ~RTBMeanPostImprovement();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif

+ 250 - 0
regression/randomforest/RTBMinDist.cpp

@@ -0,0 +1,250 @@
+/**
+* @file RTBMinDist.cpp
+* @brief random regression tree; split criterion is to minimize mean distance of all examples of an inner node
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#include <iostream>
+
+#include "RTBMinDist.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBMinDist::RTBMinDist( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_distance_reduction = conf->gD("RandomForest", "minimum_distance_reduction", 10e-3 );
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBMinDist::~RTBMinDist()
+{
+}
+
+void RTBMinDist::computeDistanceToPrototype( const vector<double> &fvalues,
+                                            const int &countEx,
+                                            double &dist )
+{
+  double prototype = 0.0;
+
+  for ( int i = 0; i < countEx; i++ ){
+    prototype += fvalues[i];
+  }
+  prototype /= (double)countEx;
+
+  for ( int i = 0; i < countEx; i++ ){
+    dist += abs(prototype - fvalues[i]);
+  }
+  dist /= (double)countEx;
+}
+
+bool RTBMinDist::averageDistanceLeftRight(const vector< pair< double, int > > values,
+          double threshold,
+          double& avg_dist_left,
+          double& avg_dist_right,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  vector<int> selection_left;
+  vector<int> selection_right;
+  vector<double> values_left;
+  vector<double> values_right;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+      selection_left.push_back( it->second );
+      values_left.push_back( it->first );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( it->second );
+      values_right.push_back( it->first );
+    }
+  }
+  
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  if ( (count_left < min_examples)  || (count_right < min_examples) )
+    return false; // no split
+  
+  //compute mean distance of left and right group to respective prototype
+  computeDistanceToPrototype( values_left, count_left, avg_dist_left);
+  computeDistanceToPrototype( values_right, count_right, avg_dist_right);
+  
+  return true;
+}
+
+RegressionNode *RTBMinDist::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBMinDist: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMinDist: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_reduct = -1.0;
+//  vector<pair<double, int> > best_values;
+  vector<pair<double, int> > values;
+  double distance_left = 0.0;
+  double distance_right = 0.0;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+
+    double curDist = 0.0;
+    vector<double> fvalues;
+    for ( vector< pair< double, int > >::const_iterator it = values.begin();
+          it != values.end(); it++ )
+    {
+      fvalues.push_back(it->first);
+    }
+    computeDistanceToPrototype( fvalues, (int)values.size(), curDist );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
+#endif
+      distance_left = 0.0;
+      distance_right = 0.0;
+      
+      int count_left, count_right;
+      if ( ! averageDistanceLeftRight( values, threshold, distance_left,
+          distance_right, count_left, count_right) )
+        continue;
+      
+      //double pl = (count_left) / (count_left +count_right);
+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
+      double distReduction = curDist - distance_left - distance_right;
+      
+      if ( distReduction > best_reduct )
+      {
+        best_reduct = distReduction;
+        best_threshold =  threshold;
+        best_feature = f;
+#ifdef DETAILTREE
+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+      }
+    }
+  }
+  
+  if ( best_reduct < minimum_distance_reduction )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBMinDist: distance reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBMinDist::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+} 
+ 

+ 66 - 0
regression/randomforest/RTBMinDist.h

@@ -0,0 +1,66 @@
+/**
+* @file RTBMinDist.h
+* @brief random regression tree; split criterion is to minimize mean distance of all examples of an inner node
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#ifndef RTBMINDISTINCLUDE
+#define RTBMINDISTINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBMinDist : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_distance_reduction;
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    void computeDistanceToPrototype ( const std::vector<double> &fvalues,
+                                      const int &countEx,
+                                      double &dist);
+
+    bool averageDistanceLeftRight ( const std::vector< std::pair< double, int > > values,
+          double threshold,
+          double & avg_dist_left,
+          double & avg_dist_right,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBMinDist( const NICE::Config *conf, std::string section = "RTBMinDist" );
+    
+    /** simple destructor */
+    virtual ~RTBMinDist();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif 
+ 

+ 228 - 0
regression/randomforest/RTBRandom.cpp

@@ -0,0 +1,228 @@
+/**
+* @file RTBRandom.cpp
+* @brief random regression tree
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include "RTBRandom.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBRandom::RTBRandom( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBRandom::~RTBRandom()
+{
+}
+
+bool RTBRandom::errorReductionLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          double& error_left,
+          double& error_right,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  vector<int> selection_left;
+  vector<int> selection_right;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+      selection_left.push_back( it->second );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( it->second );
+    }
+  }
+  
+//   if ( (count_left == 0) || (count_right == 0) )
+//     return false; // no split
+  
+  if ( (count_left < min_examples)  || (count_right < min_examples) )
+    return false; // no split
+  
+  RegressionNode *left = new RegressionNode ();
+  left->nodePrediction( y, selection_left );
+  error_left = left->lsError;
+  delete left;
+  
+  RegressionNode *right = new RegressionNode ();
+  right->nodePrediction( y, selection_right );
+  error_right = right->lsError;
+  delete right;
+  
+  return true;
+}
+
+RegressionNode *RTBRandom::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+  node->nodePrediction( y, selection );
+  double lsError = node->lsError;
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBRandom: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBRandom: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_reduct = -1.0;
+  vector<pair<double, int> > best_values;
+  vector<pair<double, int> > values;
+  double lsError_left = 0.0;
+  double lsError_right = 0.0;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
+#endif
+      lsError_left = 0.0;
+      lsError_right = 0.0;
+      
+      int count_left, count_right;
+      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
+          lsError_right, count_left, count_right) )
+        continue;
+      
+      //double pl = (count_left) / (count_left +count_right);
+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
+      double errorReduction = lsError - lsError_left - lsError_right;
+      
+      if ( errorReduction > best_reduct )
+      {
+        best_reduct = errorReduction;
+        best_threshold =  threshold;
+        best_feature = f;
+#ifdef DETAILTREE
+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+      }
+    }
+  }
+  
+  if ( best_reduct < minimum_error_reduction )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBRandom: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBRandom::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+}

+ 72 - 0
regression/randomforest/RTBRandom.h

@@ -0,0 +1,72 @@
+/**
+* @file RTBRandom.h
+* @brief random regression tree
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#ifndef RTBRANDOMINCLUDE
+#define RTBRANDOMINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBRandom : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_error_reduction;
+    
+    int random_split_mode;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    enum {
+      RANDOM_SPLIT_INDEX = 0,
+      RANDOM_SPLIT_UNIFORM
+    };
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    bool errorReductionLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          double & error_left,
+          double & error_right,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBRandom( const NICE::Config *conf, std::string section = "RTBRandom" );
+    
+    /** simple destructor */
+    virtual ~RTBRandom();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif

+ 357 - 0
regression/randomforest/RegRandomForests.cpp

@@ -0,0 +1,357 @@
+/**
+* @file RegRandomForests.cpp
+* @brief implementation of random set forests for regression
+* @author Sven Sickert
+* @date 06/28/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <assert.h>
+
+#include "vislearning/regression/randomforest/RegRandomForests.h"
+#include "vislearning/regression/randomforest/RTBRandom.h"
+#include "vislearning/regression/randomforest/RTBLinear.h"
+#include "vislearning/regression/randomforest/RTBMinDist.h"
+#include "vislearning/regression/randomforest/RTBGrid.h"
+#include "vislearning/regression/randomforest/RTBClusterRandom.h"
+#include "vislearning/regression/randomforest/RTBMeanPostImprovement.h"
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+RegRandomForests::RegRandomForests()
+{
+  builder = NULL;
+  minimum_error_reduction = 0.0;
+  enableOutOfBagEstimates = false;
+}
+
+RegRandomForests::RegRandomForests( const Config *_conf,
+          std::string section ) : conf(_conf)
+{
+  std::string builder_method = conf->gS(section, "builder", "random");
+  minimum_error_reduction = conf->gD(section, "minimum_error_reduction", 10e-3);
+  enableOutOfBagEstimates = conf->gB(section, "enable_out_of_bag_estimates", false);
+  
+  confsection = section;
+  
+  if ( builder_method == "none" ) {
+    // do not initialize
+    builder = NULL;
+  }
+  else {
+    number_of_trees = conf->gI(section, "number_of_trees", 20 );
+    features_per_tree = conf->gD(section, "features_per_tree", 1.0 );
+    samples_per_tree  = conf->gD(section, "samples_per_tree", 0.2 );
+    
+    if ( builder_method == "random" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
+      builder = new RTBRandom ( conf, builder_section );
+    }
+    else if ( builder_method == "min_dist" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBMinDist");
+      builder = new RTBMinDist ( conf, builder_section );
+    }
+    else if ( builder_method == "linear" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
+      builder = new RTBLinear ( conf, builder_section );
+    }
+    else if ( builder_method == "grid" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBGrid");
+      builder = new RTBGrid ( conf, builder_section );
+    }
+    else if ( builder_method == "cluster_random" ) 
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBClusterRandom");
+      builder = new RTBClusterRandom ( conf, builder_section );
+    }
+    else if ( builder_method == "mean_post_improvement" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBMeanPostImprovement");
+      builder = new RTBMeanPostImprovement ( conf, builder_section );
+    } else {
+      fprintf (stderr, "RegressionTreeBuilder %s not yet implemented !\n", builder_method.c_str() );
+      exit(-1);
+    }
+  } 
+}
+
+RegRandomForests::~RegRandomForests()
+{
+  for ( vector<RegressionTree *>::iterator it = forest.begin();
+             it != forest.end(); it++ )
+    delete (*it);
+  
+  if ( builder != NULL )
+    delete builder;
+}
+
+void RegRandomForests::calcOutOfBagEstimates (
+          std::vector< std::vector<int> > & outofbagtrees,
+          NICE::VVector x,
+          NICE::Vector y )
+{
+  oobResults.clear();
+  
+  // calculate out of bag regression results
+  // as suggested bei Breiman
+  // out of bag = training data not used to build
+  // a single tree is used as testing data for the tree
+  long index = 0;
+  for ( int i = 0; i < (int)x.size(); i++, index++ )
+  {
+    double trueValue = y[i];
+    const vector<int> & trees = outofbagtrees[index];
+    
+    if ( trees.size() <= 0 ) continue;
+    
+    double predValue = predict ( x[i], trees );
+    
+    double predError = abs( trueValue - predValue );
+    oobResults.push_back ( pair<double, double> ( predError, trueValue ) );
+  }
+}
+
+void RegRandomForests::getLeafNodes ( NICE::Vector x,
+          std::vector<RegressionNode *> & leafNodes,
+          int depth )
+{
+  leafNodes.reserve ( forest.size() );
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+        it != forest.end(); it++ )
+  {
+    RegressionTree & rt = *(*it);
+    RegressionNode *leaf = rt.getLeafNode ( x, depth );
+    leafNodes.push_back ( leaf );
+  }
+}
+
+void RegRandomForests::getAllLeafNodes ( vector<RegressionNode *> & leafNodes)
+{
+  int z = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++, z++ )
+  {
+    RegressionTree & rt = *(*it);
+    vector<RegressionNode *> leaves = rt.getAllLeafNodes();
+    for ( int j = 0; j < (int)leaves.size(); j++ )
+    {
+      for ( int k = 0; k < (int)leaves[j]->trainExamplesIndices.size(); k++ )
+      {
+        leaves[j]->trainExamplesIndices[k] = exselection[z][leaves[j]->trainExamplesIndices[k]];
+      }
+      leafNodes.push_back(leaves[j]);
+    }
+  }
+}
+
+void RegRandomForests::teach ( const NICE::VVector & x, const NICE::Vector & y )
+{
+  cerr << "RegRandomForests::teach()" << endl;
+  assert( builder != NULL );
+  
+  int featuresCount = (int) (x[0].size() * features_per_tree );
+  fprintf(stderr, "RegRandomForests: number of features %d\n", (int)x[0].size() );
+  
+  vector< vector<int> > outofbagtrees;
+  outofbagtrees.resize( x.size() );
+  
+  for ( int k = 0; k < number_of_trees; k++ )
+  {
+    vector<int> tmp;
+    exselection.push_back(tmp);
+  }
+  
+  #pragma omp parallel for
+  for ( int k = 0; k < number_of_trees; k++ )
+  {
+    fprintf( stderr, "[ -- building tree %d/%d -- ]\n", k + 1, number_of_trees);
+    
+    vector<int> examples_index;
+    for ( int i = 0; i < (int)x.size(); i++ )
+    {
+      examples_index.push_back( i );
+    }
+    
+    int trainingExamples = (int)(examples_index.size() * samples_per_tree);
+    fprintf (stderr, "RegRandomForests: selection of %d examples for each tree\n", trainingExamples );
+    
+    if ( (trainingExamples < 3) && ((int)examples_index.size() > trainingExamples) )
+    {
+      fprintf(stderr, "RegRandomForests: number of examples < 3 !! minExamples=%d, trainingExamples=%d\n",
+                      (int)x.size(), trainingExamples);
+      trainingExamples = examples_index.size();
+      fprintf(stderr, "RegRandomForests: I will use all %d examples. !!\n", trainingExamples);
+    }
+    
+    if ( samples_per_tree < 1.0 )
+      random_shuffle( examples_index.begin(), examples_index.end() );
+    
+    VVector subset;
+    Vector subval ( trainingExamples );
+    for ( int e = 0; e < trainingExamples; e++ )
+    {
+      exselection[k].push_back( examples_index[e] );
+      subset.push_back( x[ examples_index[e] ] );
+      subval.set( e, y[ examples_index[e] ] );
+    }
+        
+    // set out of bag trees
+    for ( uint e = trainingExamples; e < examples_index.size(); e++ )
+    {
+      int index = examples_index[e];
+      #pragma omp critical
+      outofbagtrees[index].push_back(k);
+    }
+    
+    /******* select a random feature set *******/
+    vector<int> features_subset;
+    for ( int j = 0; j < (int)x[0].size(); j++ )
+      features_subset.push_back( j );
+    
+    random_shuffle( features_subset.begin(), features_subset.end() );
+    while ((int)features_subset.size() > featuresCount)
+      features_subset.pop_back();
+    
+    /******* training of an individual tree ****/
+    RegressionTree *tree = new RegressionTree( conf );
+    
+    builder->build( *tree, subset, subval );
+    
+    /******* prune tree using least squares criterion *****/
+    //if ( minimum_error_reduction > 0.0 )
+    //  tree->pruneTreeLeastSquares( minimum_error_reduction );
+    
+    /******* add individual tree to ensemble *****/
+    #pragma omp critical
+    forest.push_back(tree);
+  }
+  
+  if (enableOutOfBagEstimates)
+    calcOutOfBagEstimates(outofbagtrees, x, y);
+}
+
+double RegRandomForests::predict ( const NICE::Vector & x, 
+          const vector< int > & outofbagtrees )
+{
+  // predict using only a selection of all trees
+  // contained in outofbagtrees
+  
+  double overall_prediction = 0.0;
+  int treecount = 0;
+  
+  for ( vector<int>::const_iterator it = outofbagtrees.begin();
+        it != outofbagtrees.end();
+        it++ )
+  {
+    assert ( *it < (int)forest.size() );
+    RegressionTree & rt = *(forest[(*it)]);
+    double predVal;
+    rt.traverse( x, predVal );
+    
+    overall_prediction += predVal;
+    treecount++;
+  }
+  
+  overall_prediction /= treecount;
+  
+  return overall_prediction;
+}
+
+
+double RegRandomForests::predict ( const NICE::Vector & x )
+{
+  double overall_prediction = 0.0;
+  int treecount = 0;
+  
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+        it != forest.end();
+        it++ )
+  {
+    RegressionTree & rt = *(*it);
+    double predVal;
+    rt.traverse( x, predVal );
+    
+    overall_prediction += predVal;
+    treecount++;
+  }
+  
+  overall_prediction /= treecount;
+  
+  return overall_prediction;
+}
+
+void RegRandomForests::restore(istream & is, int format)
+{
+  std::string tag;
+  int index;
+
+  while ( (is >> tag) && (tag == "TREE") )
+  {
+    is >> index;
+    RegressionTree *rt = new RegressionTree ( conf );
+    rt->restore ( is );
+    if ( minimum_error_reduction > 0.0 )
+      rt->pruneTreeLeastSquares ( minimum_error_reduction );
+
+    forest.push_back(rt);
+  }
+}
+
+void RegRandomForests::store(ostream & os, int format) const
+{
+  int index = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++, index++ )
+  {
+    const RegressionTree & rt = *(*it);
+    os << "TREE " << index << endl;
+    rt.store ( os, format );
+    os << "ENDTREE ";
+  }
+}
+
+void RegRandomForests::clear()
+{
+  for ( vector<RegressionTree *>::iterator it = forest.begin();
+          it != forest.end(); it++ )
+    delete (*it);
+
+  forest.clear();
+}
+
+void RegRandomForests::indexDescendants(
+          map<RegressionNode *, pair<long, int> > & index) const
+{
+  long maxindex = 0;
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++ )
+    (*it)->indexDescendants ( index, maxindex );
+}
+
+void RegRandomForests::resetCounters()
+{
+  for ( vector<RegressionTree *>::const_iterator it = forest.begin();
+          it != forest.end(); it++ )
+    (*it)->resetCounters ();
+}
+
+void RegRandomForests::setComplexity(int size)
+{
+    fprintf (stderr, "RegRandomForests: set complexity to %d, overwriting current value %d\n", 
+    size, number_of_trees );
+    number_of_trees = size;
+}
+

+ 128 - 0
regression/randomforest/RegRandomForests.h

@@ -0,0 +1,128 @@
+/**
+ * @file RegRandomForests.h
+ * @brief implementation of random set forest for regression
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRANDOMFORESTSINCLUDE
+#define REGRANDOMFORESTSINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+#include "vislearning/regression/randomforest/RegressionTree.h"
+#include "vislearning/regression/randomforest/RegressionTreeBuilder.h"
+
+
+namespace OBJREC
+{
+  
+/** implementation of random set forests for regression */
+class RegRandomForests : public RegressionAlgorithm
+{
+  protected:
+     /** vector containing all decision trees for regression */
+    std::vector<RegressionTree *> forest;
+
+    /** number of trees which will be generated during training */
+    int number_of_trees;
+
+    /** fraction of features used for each tree */
+    double features_per_tree;
+
+    /** fraction of training examples used for each tree */
+    double samples_per_tree;
+
+    /** if >0 then prune the trees using pruneTreeLeastSquares */
+    double minimum_error_reduction;
+
+    /** stored config to initialize a tree */
+    const NICE::Config *conf;
+
+    /** config section containing important config values */
+    std::string confsection;
+
+    /** pointer to the tree builder method */
+    RegressionTreeBuilder *builder;
+
+    /** calculate out-of-bag statistics or not */
+    bool enableOutOfBagEstimates;
+    
+    /** out-of-bag statistics */
+    std::vector<std::pair<double, double> > oobResults;
+
+    /** predict using only a subset of all trees */
+    double predict ( const NICE::Vector & x,
+          const std::vector<int> & outofbagtrees );
+
+    /** calculate out-of-bag statistics */
+    void calcOutOfBagEstimates ( std::vector< std::vector<int> > & outofbagtrees,
+          NICE::VVector x,
+          NICE::Vector y );
+
+    /** save example selection per tree */
+    std::vector<std::vector<int> > exselection;
+    
+  public:
+    
+    /** initialize the regression method */
+    RegRandomForests ( const NICE::Config *conf,
+          std::string section );
+    
+    /** do nothing */
+    RegRandomForests ();
+    
+    /** simple destructor */
+    virtual ~RegRandomForests();
+    
+    /** learn parameters/models/whatever using a set of vectors and
+     *  their corresponding function values
+     */
+    void teach ( const NICE::VVector & x, const NICE::Vector & y );
+    
+    /** main prediction function */
+    double predict ( const NICE::Vector & x );
+
+    /** get all leaf nodes for a given value (or inner nodes if depth is set to the level) */
+    void getLeafNodes ( NICE::Vector x,
+          std::vector<RegressionNode *> & leafNodes,
+          int depth = 100000 );
+    
+    /** get all leaf nodes (or inner nodes if depth is set to the level) */
+    void getAllLeafNodes ( std::vector<RegressionNode *> & leafNodes );
+
+    /** enumerate all nodes within the trees */
+    void indexDescendants ( std::map<RegressionNode *, std::pair<long, int> > & index ) const;
+
+    /** reset all counters in all nodes contained in the forest */
+    void resetCounters ();
+    
+    /** clone function */
+    virtual RegRandomForests *clone ( void ) const
+    {
+      fthrow ( NICE::Exception, "clone() not yet implemented!\n" );
+    }
+    
+    /** get out of bag estimates */
+    std::vector<std::pair<double, double> > & getOutOfBagResults ()
+    {
+      return oobResults;
+    };
+    
+    /** set the number of trees */
+    void setComplexity ( int size );
+    
+    /** IO functions */
+    void restore ( std::istream & is, int format = 0 );
+    void store ( std::ostream & os, int format = 0 ) const;
+    void clear ();
+};
+  
+} // namespace
+
+#endif

+ 146 - 0
regression/randomforest/RegressionNode.cpp

@@ -0,0 +1,146 @@
+/**
+* @file RegressionNode.cpp
+* @brief regression node
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include "vislearning/regression/randomforest/RegressionNode.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RegressionNode::~RegressionNode()
+{
+}
+
+RegressionNode::RegressionNode ()
+{
+  left = NULL;
+  right = NULL;
+  f = 0;
+  counter = 0;
+}
+
+RegressionNode *RegressionNode::getLeafNode (
+          const NICE::Vector & x,
+          int depth )
+{
+  if ( (!depth) || ((left == NULL) && (right == NULL)) )
+    return this;
+  
+  double val = x[f];
+  if ( val < threshold )
+    if ( left != NULL )
+      return left->getLeafNode ( x, depth - 1 );
+    else
+      return this;
+  else
+    if ( right != NULL )
+      return right->getLeafNode( x, depth - 1 );
+    else
+      return this;
+}
+
+void RegressionNode::traverse (
+          const NICE::Vector & x,
+          double & _predVal
+                              )
+{
+  RegressionNode *leaf = getLeafNode ( x );
+  _predVal = leaf->predVal;
+}
+
+void RegressionNode::statistics ( int & depth, int & count ) const
+{
+  int dl, cl;
+  if ( left != NULL )
+  {
+    left->statistics ( dl, cl );
+    dl++;
+  } else {
+    dl = 0;
+    cl = 0;
+  }
+  
+  if ( right != NULL )
+  {
+    right->statistics( depth, count );
+    depth++;
+  } else {
+    depth = 0;
+    count = 0;
+  }
+  
+  depth = (depth > dl) ? depth : dl;
+  count += cl + 1;
+}
+
+void RegressionNode::indexDescendants (
+          map<RegressionNode *, pair<long, int> > & index,
+          long & maxindex,
+          int depth ) const
+{
+  if ( left != NULL )
+  {
+    maxindex++;
+    index.insert ( pair<RegressionNode *, pair<long, int> > ( left, pair<long, int>(maxindex, depth + 1) ) );
+    left->indexDescendants ( index, maxindex, depth+1 );
+  }
+  
+  if ( right != NULL )
+  {
+    maxindex++;
+    index.insert ( pair<RegressionNode *, pair<long, int> > ( right, pair<long, int>(maxindex, depth + 1) ) );
+    right->indexDescendants ( index, maxindex, depth+1 );
+  }
+}
+
+void RegressionNode::nodePrediction( 
+          const Vector & y,
+          const vector<int> & selection )
+{
+  double mean = 0.0;
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    mean += y[ selection[i] ];
+  }
+  mean = mean/selection.size();
+  
+  double sum_squares = 0.0;
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    double diff = y[ selection[i] ] - mean;
+    sum_squares += diff*diff;
+  }
+  
+  lsError = sum_squares;
+  predVal = mean;
+}
+
+void RegressionNode::resetCounters ()
+{
+  counter = 0;
+  if ( left != NULL ) left->resetCounters();
+  if ( right != NULL ) right->resetCounters();
+}
+
+void RegressionNode::copy ( RegressionNode *node )
+{
+    left = node->left;
+    right = node->right;
+    threshold = node->threshold; 
+    f = node->f;
+    predVal = node->predVal;
+    lsError = node->lsError;
+    trainExamplesIndices = node->trainExamplesIndices;
+}
+
+bool RegressionNode::isLeaf () const
+{
+    return ( (right == NULL) && (left == NULL) );
+}

+ 92 - 0
regression/randomforest/RegressionNode.h

@@ -0,0 +1,92 @@
+/**
+ * @file RegressionNode.h
+ * @brief regression node
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONNODEINCLUDE
+#define REGRESSIONNODEINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include <map>
+#include <limits>
+
+namespace OBJREC {
+  
+/** regression node: f(x) < threshold ? */
+class RegressionNode
+{
+  protected:
+  
+  public:
+    
+    /** threshold of the regression node */
+    double threshold;
+    
+    /** counter which can be used to
+        count the number of examples which reached the node */
+    double counter;
+    
+    /** the feature used for the regression node split */
+    int f;
+    
+    /** the least squares error of the node */
+    double lsError;
+    
+    /** the prediction value of the node */
+    double predVal;
+
+    /** the left branch of the tree */
+    RegressionNode *left;
+
+    /** the right branch of the tree */
+    RegressionNode *right;
+    
+    /** Indices of examples which were used to estimate the
+     * prediction value during training */
+    std::vector<int> trainExamplesIndices;
+
+    /** constructor */
+    RegressionNode ();
+    
+    /** simple destructor */
+    virtual ~RegressionNode();
+    
+    /** traverse the tree and get the resulting leaf node */
+    RegressionNode *getLeafNode ( const NICE::Vector & x,
+          int depth = std::numeric_limits<int>::max() );
+
+    /** traverse this node with an example */
+    void traverse ( const NICE::Vector & x,
+          double & predVal );
+    
+    /** calculate the overall statistic of the current branch */
+    void statistics ( int & depth, int & count ) const;
+     
+    /** only index descendants (with > depth), do not index node itsself */
+    void indexDescendants ( std::map<RegressionNode *,
+          std::pair<long, int> > & index, 
+          long & maxindex,
+          int depth ) const;
+
+    /** calculate the prediction value for this node */
+    void nodePrediction( const NICE::Vector & y,
+          const std::vector<int> & selection);
+    
+    /** reset the counters variable of the current branch */
+    void resetCounters ();
+
+    /** copy the node information to another node */
+    void copy ( RegressionNode *node );
+
+    /** is this node a leaf */
+    bool isLeaf () const;
+};
+  
+  
+} // namespace
+
+#endif

+ 257 - 0
regression/randomforest/RegressionTree.cpp

@@ -0,0 +1,257 @@
+/** 
+* @file RegressionTree.cpp
+* @brief regression tree implementation
+* @author Sven Sickert
+* @date 06/19/2013
+
+*/
+#include <iostream>
+#include <assert.h>
+
+#include "vislearning/regression/randomforest/RegressionTree.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RegressionTree::RegressionTree( const Config *_conf ) : conf(_conf)
+{
+  root = NULL;
+}
+
+RegressionTree::~RegressionTree()
+{
+  deleteNodes ( root );
+}
+
+void RegressionTree::statistics ( int & depth, int & count ) const
+{
+  if ( root == NULL )
+  {
+    depth = 0;
+    count = 0;
+  } else {
+    root->statistics ( depth, count );
+  }
+}
+
+void RegressionTree::traverse (
+          const Vector & x,
+          double & predVal )
+{
+  assert( root != NULL );
+  root->traverse ( x, predVal );
+}
+
+void RegressionTree::deleteNodes ( RegressionNode *tree )
+{
+  if ( tree != NULL )
+  {
+    deleteNodes ( tree->left );
+    deleteNodes ( tree->right );
+    delete tree;
+  }
+}
+
+void RegressionTree::clear ()
+{
+  deleteNodes ( root );
+}
+
+void RegressionTree::resetCounters ()
+{
+  if ( root != NULL )
+    root->resetCounters ();
+}
+
+void RegressionTree::indexDescendants ( 
+          map<RegressionNode *, pair<long, int> > & index,
+          long & maxindex ) const
+{
+    if ( root != NULL )
+      root->indexDescendants ( index, maxindex, 0 );
+}
+
+RegressionNode *RegressionTree::getLeafNode ( 
+          Vector & x,
+          int maxdepth )
+{
+    return root->getLeafNode ( x, maxdepth );
+}
+
+void RegressionTree::getLeaves(
+          RegressionNode *node,
+          vector<RegressionNode*> &leaves)
+{
+  if(node->left == NULL && node->right == NULL)
+  {
+    leaves.push_back(node);
+    return;
+  }
+  getLeaves(node->right, leaves);
+  getLeaves(node->left, leaves);
+}
+
+vector<RegressionNode *> RegressionTree::getAllLeafNodes()
+{
+  vector<RegressionNode*> leaves;
+  getLeaves(root, leaves);
+  return leaves;
+}
+
+void RegressionTree::setRoot ( RegressionNode *newroot )
+{
+    root = newroot;
+}
+
+RegressionNode *RegressionTree::pruneTreeLeastSquares (
+          RegressionNode *node,
+          double minErrorReduction,
+          double & lsError )
+{
+  if ( node == NULL )  return NULL;
+  
+  lsError = node->lsError;
+  double leftError, rightError;
+  node->left = pruneTreeLeastSquares ( node->left, minErrorReduction, leftError );
+  node->right = pruneTreeLeastSquares ( node->right, minErrorReduction, rightError );
+
+  if (node->left != NULL && node->right != NULL)
+  {
+    if (lsError-leftError-rightError < minErrorReduction)
+    {
+      deleteNodes( node->left );
+      deleteNodes( node->right );
+    }
+  }
+  
+  return node;
+}
+
+void RegressionTree::pruneTreeLeastSquares ( double minErrorReduction )
+{
+  int depth, count;
+  statistics ( depth, count );
+  fprintf (stderr, "RegressionTree::pruneTreeLeastSquares: depth %d count %d\n", depth, count );
+  double tmp;
+  root = pruneTreeLeastSquares ( root, minErrorReduction, tmp );
+  statistics ( depth, count );
+  fprintf (stderr, "RegressionTree::pruneTreeLeastSquares: depth %d count %d (modified)\n", depth, count );
+}
+
+void RegressionTree::store (ostream & os, int format) const
+{
+  if ( root == NULL ) return;
+  
+  // indexing
+  map<RegressionNode *, pair<long, int> > index;
+
+  index.insert ( pair<RegressionNode *, pair<long, int> > ( NULL, pair<long, int> ( 0, 0 ) ) );
+  index.insert ( pair<RegressionNode *, pair<long, int> > ( root, pair<long, int> ( 1, 0 ) ) );
+  long maxindex = 1;
+  root->indexDescendants ( index, maxindex, 0 );
+
+  for ( map<RegressionNode *, pair<long, int> >::iterator i  = index.begin();
+        i != index.end();
+        i++ )
+  {
+    RegressionNode *node = i->first;
+
+    if ( node == NULL ) continue;
+
+    long ind = i->second.first;
+    long ind_l = index[ node->left ].first;
+    long ind_r = index[ node->right ].first;
+
+    os << "NODE " << ind << " " << ind_l << " " << ind_r << endl;
+
+    if ( !node->isLeaf() ) {
+      os << node->f;
+      os << endl;
+      os << node->threshold;
+      os << endl;
+    } else {
+      os << "LEAF";
+      os << endl;
+    }
+
+    os << node->lsError << " " << -1 << endl;
+  }
+  
+}
+
+void RegressionTree::restore (istream & is, int format)
+{
+  // indexing
+  map<long, RegressionNode *> index;
+  map<long, pair<long, long> > descendants;
+
+  index.insert ( pair<long, RegressionNode *> ( 0, NULL ) );
+
+  // refactor-nice.pl: check this substitution
+  // old: string tag;
+  std::string tag;
+
+  while ( (! is.eof()) && ( (is >> tag) && (tag == "NODE") ) )
+  {
+    long ind;
+    long ind_l;
+    long ind_r;
+    if (! (is >> ind)) break;
+    if (! (is >> ind_l)) break;
+    if (! (is >> ind_r)) break;
+  
+    descendants.insert ( pair<long, pair<long, long> > ( ind, pair<long, long> ( ind_l, ind_r ) ) );
+    RegressionNode *node = new RegressionNode();
+    index.insert ( pair<long, RegressionNode *> ( ind, node ) );
+  
+    std::string feature_tag;
+  
+    is >> feature_tag;
+    if ( feature_tag != "LEAF" )
+    {
+      is >> node->f;
+      is >> node->threshold;
+    }
+  
+    is >> node->lsError;
+  }
+
+  // connecting the tree
+  for ( map<long, RegressionNode *>::const_iterator it = index.begin();
+       it != index.end(); it++ )
+  {
+    RegressionNode *node = it->second;
+
+    if ( node == NULL ) continue;
+
+    long ind_l = descendants[it->first].first;
+    long ind_r = descendants[it->first].second;
+
+    map<long, RegressionNode *>::const_iterator il = index.find ( ind_l );
+    map<long, RegressionNode *>::const_iterator ir = index.find ( ind_r );
+
+    if ( ( il == index.end() ) || ( ir == index.end() ) )
+    {
+      fprintf (stderr, "File inconsistent: unable to build tree\n");
+      exit(-1);
+    }
+
+    RegressionNode *left = il->second;
+    RegressionNode *right = ir->second;
+
+    node->left = left;
+    node->right = right;
+  }
+  
+  map<long, RegressionNode *>::const_iterator iroot = index.find ( 1 );
+
+  if ( iroot == index.end() ) 
+  {
+    fprintf (stderr, "File inconsistent: unable to build tree (root node not found)\n");
+    exit(-1);
+  }
+
+  root = iroot->second;
+}

+ 78 - 0
regression/randomforest/RegressionTree.h

@@ -0,0 +1,78 @@
+/**
+ * @file RegressionTree.h
+ * @brief regression tree implementation for regression
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONTREEINCLUDE
+#define REGRESSIONTREEINCLUDE
+
+#include <map>
+#include <set>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/triplet.h"
+#include "core/basics/Config.h"
+#include "core/basics/Persistent.h"
+#include "vislearning/regression/randomforest/RegressionNode.h"
+
+namespace OBJREC {
+
+/** decision tree implementation for regression */
+class RegressionTree : public NICE::Persistent
+{
+  protected:
+    RegressionNode *root;
+    const NICE::Config *conf; // for restore operation
+    
+  public:
+    static void deleteNodes ( RegressionNode *tree );
+    
+    static RegressionNode *pruneTreeLeastSquares (
+          RegressionNode *node,
+          double minErrorReduction,
+          double & lsError );
+    
+    /** simple consructor */
+    RegressionTree( const NICE::Config *conf );
+    
+    /** simple destructor */
+    virtual ~RegressionTree();
+    
+    void traverse ( const NICE::Vector & x,
+          double & predVal );
+    
+    void resetCounters ();
+    
+    void statistics( int & depth, int & count ) const;
+    
+    void indexDescendants ( std::map<RegressionNode *, std::pair<long, int> > & index,
+          long & maxindex ) const;
+
+    RegressionNode *getLeafNode ( NICE::Vector & x,
+          int maxdepth = 100000 );
+    
+    void getLeaves ( RegressionNode *node, std::vector<RegressionNode*> &leaves);
+    
+    std::vector<RegressionNode *> getAllLeafNodes ();
+    
+    RegressionNode *getRoot( ) const { return root; };
+    
+    void pruneTreeLeastSquares ( double minErrorReduction );
+    
+    void setRoot( RegressionNode *newroot );
+    
+    void restore (std::istream & is, int format = 0);
+    void store (std::ostream & os, int format = 0) const;
+    void clear ();
+    
+
+};
+
+
+} // namespace
+
+#endif

+ 53 - 0
regression/randomforest/RegressionTreeBuilder.cpp

@@ -0,0 +1,53 @@
+/**
+* @file RegressionTreeBuilder.cpp
+* @brief build regression trees
+* @author Sven Sicker
+* @date 06/19/2013
+
+*/
+#include <iostream>
+
+#include <vislearning/regression/randomforest/RegressionTreeBuilder.h>
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+RegressionTreeBuilder::RegressionTreeBuilder ()
+{
+}
+
+RegressionTreeBuilder::~RegressionTreeBuilder ()
+{
+}
+
+void RegressionTreeBuilder::build (
+          RegressionTree& tree, 
+          const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  RegressionNode *root = build ( x, y );
+  tree.setRoot( root );
+  
+  int depth, count;
+  tree.statistics( depth, count );
+  fprintf (stderr, "RegressionTree: maximum depth = %d, number of nodes = %d\n", depth, count );
+}
+
+void RegressionTreeBuilder::collectFeatureValues (
+          const NICE::VVector & x,
+          const std::vector< int > & selection,
+          const int f,
+          vector< pair< double, int > >& values )
+{
+  for (int i = 0; i < (int)selection.size(); i++)
+  {
+    pair< double, int > curr;
+    double value = x[ selection[i] ][f];
+    curr.first = value;
+    curr.second = selection[i];
+    values.push_back( curr );
+  }
+}

+ 56 - 0
regression/randomforest/RegressionTreeBuilder.h

@@ -0,0 +1,56 @@
+/** 
+ * @file RegressionTreeBuilder.h
+ * @brief build regression trees
+ * @author Sven Sickert
+ * @date 06/19/2013
+
+*/
+#ifndef REGRESSIONTREEBUILDERINCLUDE
+#define REGRESSIONTREEBUILDERINCLUDE
+
+#include <map>
+#include <set>
+
+#include "core/basics/triplet.h"
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/regression/randomforest/RegressionNode.h"
+#include "vislearning/regression/randomforest/RegressionTree.h"
+
+
+namespace OBJREC {
+
+/** build regression trees */
+class RegressionTreeBuilder
+{
+
+  protected:
+
+  public:
+  
+    /** simple constructor */
+    RegressionTreeBuilder();
+          
+    /** simple destructor */
+    virtual ~RegressionTreeBuilder();
+        
+    virtual RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y ) = 0;
+
+    void collectFeatureValues(const NICE::VVector & x,
+          const std::vector<int> & selection,
+          const int f,
+          std::vector< std::pair< double, int > > & values );
+
+    void build ( RegressionTree & tree, 
+          const NICE::VVector & x,
+          const NICE::Vector & y );
+
+};
+
+
+} // namespace
+
+#endif

+ 1 - 0
regression/randomforest/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)

+ 8 - 0
regression/regcombination/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/regcombination/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 158 - 0
regression/regcombination/RegPreRandomForests.cpp

@@ -0,0 +1,158 @@
+/**
+* @file RegPreRandomForests.cpp
+* @brief Combination of a regression method with a pre-clustering using a random forest
+* @author Sven Sickert
+* @date 07/12/2013
+
+*/
+
+#include "vislearning/regression/regcombination/RegPreRandomForests.h"
+
+#include <iostream>
+#include <assert.h>
+
+using namespace OBJREC;
+using namespace std;
+using namespace NICE;
+
+RegPreRandomForests::RegPreRandomForests(const Config * conf,
+          const string & section,
+          RegressionAlgorithm *_leafRegressionPrototype )
+        : leafRegressionPrototype(_leafRegressionPrototype)
+{
+  string cluster_section = conf->gS ( section, "cluster_section", "RandomForest" );
+  mEx = conf->gI ( "RTBRandom", "min_examples", 500 );
+  randomforest = new RegRandomForests( conf, cluster_section );
+}
+
+RegPreRandomForests::~RegPreRandomForests()
+{
+  // delete the random forest
+  if ( randomforest != NULL )
+    delete randomforest;
+  
+  // delete all regression methods in the leafs
+  for ( map<RegressionNode *, RegressionAlgorithm * >::const_iterator it = leafRegressions.begin();
+        it != leafRegressions.end(); it++ )
+  {
+    RegressionAlgorithm * lr = it->second;
+    if ( lr != NULL )
+      delete lr;
+  }
+  
+  // delete regression prototype
+  if ( leafRegressionPrototype != NULL )
+    delete leafRegressionPrototype;
+}
+
+void RegPreRandomForests::teach ( const VVector & X, const Vector & y )
+{
+  randomforest->teach ( X, y );
+
+  if ( leafRegressionPrototype != NULL )
+  {
+    vector<RegressionNode *> leafNodes;
+    randomforest->getAllLeafNodes ( leafNodes );
+        
+    int lsize = leafNodes.size();
+    int leafNo = 0;
+    cerr << "leafnodes: " << lsize << endl;
+        
+    #pragma omp parallel for
+    for ( int l = 0; l < lsize; l++ )
+    {
+      leafNo++;
+
+      RegressionNode *node = leafNodes[l];
+
+      if ( !node->isLeaf() ){
+        fprintf( stderr, "RegPreRandomForests::predict: ID #%d not a leaf node!", leafNo );
+        continue;
+      }
+
+      vector<int> leafTrainInds = node->trainExamplesIndices;
+      cerr << "Teaching regression method for leaf " << leafNo-1 << "..." << endl;
+      cerr << "examples in leave: " << leafTrainInds.size() << endl;
+      assert ( leafTrainInds.size() > 0 );
+          
+      sort ( leafTrainInds.begin(), leafTrainInds.end() );
+
+      NICE::VVector leafTrainData;
+      vector<double> tmpVals;
+
+      for ( int i = 0; i < (int)leafTrainInds.size(); i++ )
+      {
+        if ( leafTrainInds[i] >= 0 && leafTrainInds[i] < (int)y.size() )
+        {
+          leafTrainData.push_back( X[ leafTrainInds[i] ] );
+          tmpVals.push_back( y[ leafTrainInds[i] ] );
+        }
+      }
+
+      if (leafTrainData.size() <= 0 ) continue;
+
+      NICE::Vector leafTrainVals( tmpVals );
+
+      RegressionAlgorithm *lr = leafRegressionPrototype->clone();
+
+      lr->teach( leafTrainData, leafTrainVals );
+
+      leafRegressions.insert ( pair< RegressionNode *, RegressionAlgorithm *> ( node, lr ) );
+    }
+  }
+}
+
+
+double RegPreRandomForests::predict ( const Vector & x )
+{
+  double pred = 0.0;
+  
+  vector<RegressionNode *> leafNodes;
+  
+  // traverse the forest and obtain all innvolved leaf nodes
+  randomforest->getLeafNodes ( x, leafNodes );
+  
+  for ( vector<RegressionNode *>::const_iterator it = leafNodes.begin();
+        it != leafNodes.end(); it++ )
+  {
+    RegressionNode *node = *it;
+    map<RegressionNode *, RegressionAlgorithm *>::const_iterator leafRegressionIt =
+      leafRegressions.find( node );
+    
+    if ( leafRegressionIt == leafRegressions.end() )
+    {
+      // this leaf has no associated regression method
+      // -> we will use the random forest result
+      pred += node->predVal;
+      continue;
+    }
+    
+    RegressionAlgorithm *leafRegression = leafRegressionIt->second;
+    pred += leafRegression->predict( x );
+  }
+  
+  pred /= leafNodes.size();
+  
+  return pred;
+}
+
+void RegPreRandomForests::clear ()
+{
+  map<RegressionNode *, RegressionAlgorithm *>::iterator iter;
+  for ( iter = leafRegressions.begin(); iter != leafRegressions.end(); iter++ )
+  {
+    iter->second->clear();
+  }
+  randomforest->clear();
+}
+
+void RegPreRandomForests::store ( ostream & os, int format ) const
+{
+  cerr << "RegPreRandomForest::store: not yet implemented" << endl;
+}
+
+void RegPreRandomForests::restore ( istream& is, int format )
+{
+  cerr << "RegPreRandomForest::restore: not yet implemented" << endl;
+}
+

+ 62 - 0
regression/regcombination/RegPreRandomForests.h

@@ -0,0 +1,62 @@
+/**
+* @file RegPreRandomForests.h
+* @brief Combination of a regression method with a pre-clustering using a random forest
+* @author Sven Sickert
+* @date 07/12/2013
+*/
+#ifndef REGPRERANDOMFORESTSINCLUDE
+#define REGPRERANDOMFORESTSINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+
+#include <map>
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+#include "vislearning/regression/randomforest/RegRandomForests.h"
+
+namespace OBJREC{
+
+/** Combination of a regression method with a pre-clustering using a random forest */
+class RegPreRandomForests : public RegressionAlgorithm
+{
+  protected:
+    /** the regression prototype used to process
+        all examples in a leaf */
+    RegressionAlgorithm *leafRegressionPrototype;
+    
+    /** regression of each leaf */
+    std::map<RegressionNode *, RegressionAlgorithm *> leafRegressions;
+    
+    /** the random forest used to pre-cluster the features */
+    RegRandomForests *randomforest;
+    
+    /** maximum number of Examples in a leaf */
+    int mEx;
+    
+  public:
+    /** simple constructor */
+    RegPreRandomForests( const NICE::Config *conf,
+          const std::string & section,
+          RegressionAlgorithm * _leafRegressionPrototype );
+   
+   /** simple destructor */
+   virtual ~ RegPreRandomForests();
+   
+   /** learn parameters/models/whatever using a set of vectors and
+    *  their corresponding function values
+    */
+   void teach ( const NICE::VVector & X, const NICE::Vector & y );
+   
+   /** predict the function value for \c x */
+   double predict ( const NICE::Vector & x );
+   
+   void clear();
+   void store ( std::ostream & os, int format = 0 ) const;
+   void restore ( std::istream & is, int format = 0 );
+
+};
+
+} // namespace
+
+#endif

+ 3 - 0
regression/regcombination/libdepend.inc

@@ -0,0 +1,3 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)
+$(call PKG_DEPEND_INT,vislearning/regression/gpregression)
+$(call PKG_DEPEND_INT,vislearning/regression/randomforest)

+ 4 - 1
regression/regressionbase/RegressionAlgorithmKernel.cpp

@@ -29,6 +29,7 @@ RegressionAlgorithmKernel::RegressionAlgorithmKernel( const RegressionAlgorithmK
 	
 	this->X = src.X;
 	this->y = src.y;
+  this->conf = src.conf;
 }
 
 RegressionAlgorithmKernel::~RegressionAlgorithmKernel()
@@ -39,7 +40,7 @@ void RegressionAlgorithmKernel::teach ( const VVector & X, const NICE::Vector &
 {
 	if ( kernelFunction == NULL )
 		fthrow( Exception, "RegressionAlgorithmKernel::teach: To use this function, you have to specify a kernel function using the constructor" );
-
+  
 	this->y = y;
 	this->X = X;
 
@@ -49,6 +50,8 @@ void RegressionAlgorithmKernel::teach ( const VVector & X, const NICE::Vector &
 	kernelData->updateCholeskyFactorization();
 
 	teach ( kernelData, this->y );
+  
+  delete kernelData;
 }
 
 double RegressionAlgorithmKernel::predict ( const NICE::Vector & x )

+ 223 - 0
regression/splineregression/CRSplineReg.cpp

@@ -0,0 +1,223 @@
+/**
+* @file CRSplineReg.cpp
+* @brief Implementation of Catmull-Rom-Splines for regression purposes
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/  
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+
+#include "vislearning/regression/splineregression/CRSplineReg.h"
+#include "vislearning/regression/linregression/LinRegression.h"
+
+#include "vislearning/math/mathbase/FullVector.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+CRSplineReg::CRSplineReg (  const NICE::Config *_conf )
+{
+  tau = _conf->gD("CRSplineReg","tau",0.5);
+  sortDim = _conf->gI("CRSplineReg","sortDim",0);
+}
+
+CRSplineReg::CRSplineReg (  uint sDim )
+{
+  sortDim = sDim;
+}
+
+CRSplineReg::CRSplineReg ( const CRSplineReg & src ) : RegressionAlgorithm ( src )
+{
+  tau = src.tau;
+  dataSet = src.dataSet;
+  labelSet = src.labelSet;
+  sortDim = src.sortDim;
+}
+
+CRSplineReg::~CRSplineReg()
+{
+}
+
+CRSplineReg* CRSplineReg::clone ( void ) const
+{
+  return new CRSplineReg(*this);
+}
+
+void CRSplineReg::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet)
+{
+    fprintf (stderr, "teach using all !\n");
+    //NOTE this is crucial if we clear _teachSet afterwards!
+    //therefore, take care NOT to call _techSet.clear() somewhere out of this method
+    this->dataSet = _dataSet;
+    this->labelSet = _labelSet.std_vector();
+    
+    std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;   
+    
+}
+
+void CRSplineReg::teach ( const NICE::Vector & x, const double & y )
+{
+    std::cerr << "CRSplineReg::teach one new example" << std::endl;
+    
+    for ( size_t i = 0 ; i < x.size() ; i++ )
+      if ( isnan(x[i]) ) 
+      {
+          fprintf (stderr, "There is a NAN value in within this vector: x[%d] = %f\n", (int)i, x[i]);
+          cerr << x << endl;
+          exit(-1);
+      }
+
+    dataSet.push_back ( x );
+    
+    labelSet.push_back ( y );
+    
+    std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
+}
+
+double CRSplineReg::predict ( const NICE::Vector & x )
+{
+  
+  if ( dataSet.size() <= 0 ) {
+    fprintf (stderr, "CRSplineReg: please use the train method first\n");
+    exit(-1);
+  }
+  int dimension = dataSet[0].size();
+
+  FullVector data ( dataSet.size()+1 );
+  
+#pragma omp parallel for  
+  for ( uint i = 0; i < dataSet.size(); i++ ){
+    data[i] = dataSet[i][sortDim];
+  }
+  data[dataSet.size()] = x[sortDim];
+    
+  std::vector<int> sortedInd;
+  data.getSortedIndices(sortedInd);
+    
+  int index;
+   
+  for ( uint i = 0; i < sortedInd.size(); i++ ){
+    if ( sortedInd[i] == (int)dataSet.size() ){
+      index = i;
+      break;
+    }
+  }
+
+  NICE::Matrix points (4,dimension+1,0.0);
+  if ( index >= 2 && index < (int)(sortedInd.size() - 2) ){	//everything is okay
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]];           
+  }
+  else if ( index == 1 ){	//just one point left from x
+    points.setRow(0,dataSet[sortedInd[index-1]]);
+    points(0,dimension) = labelSet[sortedInd[index-1]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]];      
+  }
+  else if ( index == 0 ){	//x is the farthest left point
+    points.setRow(0,dataSet[sortedInd[index+1]]);
+    points(0,dimension) = labelSet[sortedInd[index+1]];
+    points.setRow(1,dataSet[sortedInd[index+1]]);
+    points(1,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]]; 
+  }
+  else if ( index == (int)(sortedInd.size() - 2) ){	//just one point right from x
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+1]]);
+    points(3,dimension) = labelSet[sortedInd[index+1]];   
+  }
+  else if ( index == (int)(sortedInd.size() - 1) ){	//x is the farthest right point
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index-1]]);
+    points(2,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(3,dataSet[sortedInd[index-1]]);
+    points(3,dimension) = labelSet[sortedInd[index-1]];     
+  }
+
+  double t = (x[sortDim]-points(1,sortDim)) / (points(2,sortDim)-points(1,sortDim));	//this is just some kind of heuristic
+  if ( t != t || t < 0 || t > 1){	//check if t is NAN, -inf or inf (happens in the farthest right or left case from above)
+    t = 0.5;
+  }
+
+  //P(t) = b0*P0 + b1*P1 + b2*P2 + b3*P3    
+  NICE::Vector P(dimension);
+  double y;
+  double b0,b1,b2,b3;
+    
+  b0 = tau * (-(t*t*t) + 2*t*t - t);
+  b1 = tau * (3*t*t*t - 5*t*t + 2);
+  b2 = tau * (-3*t*t*t + 4*t*t + t);
+  b3 = tau * (t*t*t - t*t);
+
+#pragma omp parallel for  
+  for ( uint i = 0; i < (uint)dimension; i++ ){
+    P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+  }
+  
+  double diff1 = (P-x).normL2();
+  uint counter = 1;
+  while ( diff1 > 1e-5 && counter <= 21){	//adjust t to fit data better
+    double tmp = t;;
+    if (tmp > 0.5)
+      tmp = 1 - tmp;
+    t += tmp/counter;
+     
+    b0 = tau * (-(t*t*t) + 2*t*t - t);
+    b1 = tau * (3*t*t*t - 5*t*t + 2);
+    b2 = tau * (-3*t*t*t + 4*t*t + t);
+    b3 = tau * (t*t*t - t*t);
+      
+    for ( uint i = 0; i < (uint)dimension; i++ ){
+      P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+    }
+    
+    double diff2 = (P-x).normL2();
+    if ( diff2 > diff1 && t > 0) {
+      t -= 2*tmp/counter;    
+	
+      b0 = tau * (-(t*t*t) + 2*t*t - t);
+      b1 = tau * (3*t*t*t - 5*t*t + 2);
+      b2 = tau * (-3*t*t*t + 4*t*t + t);
+      b3 = tau * (t*t*t - t*t);
+
+#pragma omp parallel for      
+      for ( uint i = 0; i < (uint)dimension; i++ ){
+	P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+      }
+      diff1 = (P-x).normL2();
+    }
+    counter++;
+  }
+  
+  y = b0*points(0,dimension) + b1*points(1,dimension) + b2*points(2,dimension) + b3*points(3,dimension);
+
+  return y;
+  
+}

+ 68 - 0
regression/splineregression/CRSplineReg.h

@@ -0,0 +1,68 @@
+/**
+* @file CRSplineReg.h
+* @brief Implementation of Catmull-Rom-Splines for regression purposes
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/  
+#ifndef CRSPLINEREGINCLUDE
+#define CRSPLINEREGINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/Config.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+namespace OBJREC
+{
+class CRSplineReg : public RegressionAlgorithm
+{
+  protected:
+    /** smoothness parameter */
+    double tau;
+    
+    /** dimension which is used for sorting the data (maybe use something like PCA to determine this variable) */
+    uint sortDim;
+    
+    /** set of data points */
+    NICE::VVector dataSet;
+    
+    /** set of responses according to dataset */
+    std::vector<double> labelSet;
+  
+  public:
+    /** simple constructor */
+    CRSplineReg( const NICE::Config *_conf );
+    
+    /** simple constructor specifying in which dimension data should be sorted*/
+    CRSplineReg( uint sDim );
+    
+    /** copy constructor */
+    CRSplineReg ( const CRSplineReg & src );
+    
+    /** simple destructor */
+    virtual ~CRSplineReg();
+    
+    /** clone function */
+    CRSplineReg* clone (void) const;    
+    
+    /** predict response using simple vector */
+    double predict ( const NICE::Vector & x );
+    
+    /** teach whole set at once */
+    void teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet );
+
+    /** teach one data point at a time */
+    void teach ( const NICE::Vector & x, const double & y );
+  
+};
+}	//nameospace
+
+
+
+
+
+#endif

+ 8 - 0
regression/splineregression/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/splineregression/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 1 - 0
regression/splineregression/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)