Forráskód Böngészése

added unit test for ERC forest

Johannes Ruehle 11 éve
szülő
commit
ba7f8e6c7e

+ 89 - 0
features/simplefeatures/tests/Makefile.inc

@@ -0,0 +1,89 @@
+# BINARY-DIRECTORY-MAKEFILE
+# conventions:
+# - there are no subdirectories, they are ignored!
+# - all ".C", ".cpp" and ".c" files in the current directory are considered
+#   independent binaries, and linked as such.
+# - the binaries depend on the library of the parent directory
+# - the binary names are created with $(BINNAME), i.e. it will be more or less
+#   the name of the .o file
+# - all binaries will be added to the default build list ALL_BINARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+#SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+#include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# include the libdepend.inc file, which gives additional dependencies for the
+# libraries and binaries. additionally, an automatic dependency from the library
+# of the parent directory is added (commented out in the code below).
+
+-include $(SUBDIR)libdepend.inc
+
+PARENTDIR:=$(patsubst %/,%,$(dir $(patsubst %/,%,$(SUBDIR))))
+$(call PKG_DEPEND_INT,$(PARENTDIR))
+$(call PKG_DEPEND_EXT,CPPUNIT)
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+      $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+CHECKS:=$(BINDIR)$(call LIBNAME,$(SUBDIR))
+ALL_CHECKS+=$(CHECKS)
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. each binary depends on the corresponding .o file and
+# on the libraries specified by the INTLIBS/EXTLIBS. these dependencies can be
+# specified manually or they are automatically stored in a .bd file.
+
+$(foreach head,$(wildcard $(SUBDIR)*.h),$(eval $(shell grep -q Q_OBJECT $(head) && echo $(head) | sed -e's@^@/@;s@.*/\(.*\)\.h$$@$(BINDIR)\1:$(OBJDIR)moc_\1.o@')))
+$(eval $(foreach c,$(CHECKS),$(c):$(BUILDDIR)$(CPPUNIT_MAIN_OBJ) $(OBJS) $(call PRINT_INTLIB_DEPS,$(c),.a)))
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 173 - 0
features/simplefeatures/tests/TestCodebookRandomForest.cpp

@@ -0,0 +1,173 @@
+/**
+  * Unit test for Extremely randomized clustering forest (ERC).
+  *
+  * @author Johannes Ruehle
+  * @date 01/05/2014
+  */
+#ifdef NICE_USELIB_CPPUNIT
+
+#include <string>
+#include <exception>
+#include <iostream>
+#include <fstream>
+
+//----------
+
+#include "TestCodebookRandomForest.h"
+
+#include "vislearning/features/simplefeatures/CodebookRandomForest.h"
+#include "vislearning/features/fpfeatures/VectorFeature.h"
+
+#include "vislearning/cbaselib/FeaturePool.h"
+
+const bool verbose = false;
+const bool verboseStartEnd = true;
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestCodebookRandomForest );
+
+void TestCodebookRandomForest::setUp() {
+}
+
+void TestCodebookRandomForest::tearDown() {
+}
+void TestCodebookRandomForest::testCodebookRandomForest()
+{
+    if (verboseStartEnd)
+        std::cerr << "================== TestCodebookRandomForest::TestCodebookRandomForest ===================== " << std::endl;
+    try
+        {
+        Matrix mX;
+        Vector vY;
+        Vector vY_multi;
+
+        //ifstream ifs ("toyExample1.data", ios::in);
+        //   ifstream ifs ("toyExampleLargeScale.data", ios::in);
+        ifstream ifs ("toyExampleLargeLargeScale.data", ios::in);
+        CPPUNIT_ASSERT ( ifs.good() );
+        ifs >> mX;
+        ifs >> vY;
+        ifs >> vY_multi;
+        ifs.close();
+
+        if (verbose)
+        {
+            std::cerr << "data loaded: mX" << std::endl;
+            std::cerr << mX << std::endl;
+            std::cerr << "vY: " << std::endl;
+            std::cerr << vY << std::endl;
+            std::cerr << "vY_multi: " << std::endl;
+            std::cerr << vY_multi << std::endl;
+        }
+
+        int iNumFeatureDimension = mX.cols();
+
+        // memory layout needs to be transposed into rows x column: features x samples
+        // features must lay next to each other in memory, so that each feature vector can
+        // be adressed by a starting pointer and the number of feature dimensions to come.
+        Matrix mX_transposed = mX.transpose();
+
+        Examples examples;
+
+        bool bSuccess = Examples::wrapExamplesAroundFeatureMatrix(mX_transposed, vY_multi, examples);
+        CPPUNIT_ASSERT( bSuccess );
+
+        CPPUNIT_ASSERT( examples.size() == mX.rows() );
+
+        //----------------- create raw feature mapping -------------
+        OBJREC::FeaturePool fp;
+        OBJREC::VectorFeature *pVecFeature = new OBJREC::VectorFeature(iNumFeatureDimension);
+        pVecFeature->explode(fp);
+
+        //----------------- debug features -------------
+        OBJREC::Example t_Exp = examples[0].second;
+        NICE::Vector t_FeatVector;
+        fp.calcFeatureVector(t_Exp, t_FeatVector);
+        std::cerr << "first full Feature Vec: " <<t_FeatVector << std::endl;
+
+        //----------------- train our random Forest -------------
+        NICE::Config conf("config.conf");
+        OBJREC::FPCRandomForests *pRandForest = new OBJREC::FPCRandomForests(&conf,"RandomForest");
+        pRandForest->train(fp, examples);
+
+        //----------------- create codebook ERC clusterer -------------
+        int nMaxDepth        = conf.gI("CodebookRandomForest", "maxDepthTree",10);
+        int nMaxCodebookSize = conf.gI("CodebookRandomForest", "maxCodebookSize",100);
+
+        std::cerr << "maxDepthTree " << nMaxDepth << std::endl;
+
+        OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(pRandForest, nMaxDepth, nMaxCodebookSize);
+
+
+
+
+        //----------------- quantize samples into histogram -------------
+        size_t iNumCodewords        = pCodebookRandomForest->getCodebookSize();
+        NICE::Vector histogram(iNumCodewords, 0.0f);
+
+        int t_iCodebookEntry; double t_fWeight; double t_fDistance;
+
+        for (size_t i = 0; i < examples.size(); i++ )
+        {
+            Example &t_Ex = examples[i].second;
+            pCodebookRandomForest->voteVQ( *t_Ex.vec, histogram, t_iCodebookEntry, t_fWeight, t_fDistance );
+            std::cerr << i << ": " << "CBEntry " << t_iCodebookEntry << " Weight: " << t_fWeight << " Distance: " << t_fDistance << std::endl;
+        }
+        std::cerr << "histogram: " << histogram << std::endl;
+
+
+
+
+        // test of store and restore
+        std::string t_sDestinationSave = "codebookRF.save.txt";
+        std::ofstream ofs;
+        ofs.open (t_sDestinationSave.c_str(), std::ofstream::out);
+        pCodebookRandomForest->store( ofs );
+        ofs.close();
+        // restore
+        OBJREC::CodebookRandomForest *pTestCRF = new OBJREC::CodebookRandomForest(-1, -1);
+        std::ifstream ifs2;
+        ifs2.open (t_sDestinationSave.c_str() );
+        pTestCRF->restore( ifs2 );
+        ifs2.close();
+        CPPUNIT_ASSERT_EQUAL(iNumCodewords,     pTestCRF->getCodebookSize() );
+        CPPUNIT_ASSERT_EQUAL(nMaxDepth,         pTestCRF->getMaxDepth() );
+        CPPUNIT_ASSERT_EQUAL(nMaxCodebookSize,  pTestCRF->getRestrictedCodebookSize() );
+
+        NICE::Vector histogramCompare(iNumCodewords, 0.0f);
+
+        for (size_t i = 0; i < examples.size(); i++ )
+        {
+            Example &t_Ex = examples[i].second;
+            pTestCRF->voteVQ( *t_Ex.vec, histogramCompare, t_iCodebookEntry, t_fWeight, t_fDistance );
+
+        }
+        std::cerr << "histogram of restored CodebookRandomForest: " << histogramCompare << std::endl;
+        std::cerr << "comparing histograms...";
+        for (size_t i = 0; i < iNumCodewords; i++ )
+        {
+            CPPUNIT_ASSERT_DOUBLES_EQUAL(histogram[i], histogramCompare[i], 1e-5 );
+        }
+        std::cerr << "equal..." << std::endl;
+
+        // clean up
+        delete pTestCRF;
+        delete pCodebookRandomForest;
+
+        examples.clean();
+        delete pVecFeature;
+
+        if (verboseStartEnd)
+            std::cerr << "================== TestCodebookRandomForest::TestCodebookRandomForest done ===================== " << std::endl;
+    }
+    catch(std::exception &e)
+    {
+        std::cerr << "exception occured: " << e.what() << std::endl;
+    }
+}
+
+
+#endif

+ 26 - 0
features/simplefeatures/tests/TestCodebookRandomForest.h

@@ -0,0 +1,26 @@
+#ifndef _TESTVECTORFEATURE_H
+#define _TESTVECTORFEATURE_H
+
+#include <cppunit/extensions/HelperMacros.h>
+
+/**
+ * CppUnit-Testcase. 
+ */
+class TestCodebookRandomForest : public CppUnit::TestFixture {
+
+    CPPUNIT_TEST_SUITE( TestCodebookRandomForest );
+    
+    CPPUNIT_TEST(testCodebookRandomForest);
+
+    CPPUNIT_TEST_SUITE_END();
+  
+ private:
+ 
+ public:
+    void setUp();
+    void tearDown();
+
+    void testCodebookRandomForest();
+};
+
+#endif // _TESTVECTORFEATURE_H

+ 21 - 0
features/simplefeatures/tests/config.conf

@@ -0,0 +1,21 @@
+[RandomForest]
+number_of_trees = 3
+features_per_tree = 1.0
+samples_per_tree  = 0.5
+builder = random
+builder_section = DTBRandom
+minimum_error_reduction = .001
+minimum_improvement = .01
+enable_out_of_bag_estimates = false
+
+[DTBRandom]
+random_split_tests = 50
+random_features = 6
+max_depth = 4
+min_examples = 50
+save_indices = false
+start_random_generator = true
+
+[CodebookRandomForest]
+maxDepthTree = 500
+maxCodebookSize = 20

+ 12 - 0
features/simplefeatures/tests/libdepend.inc

@@ -0,0 +1,12 @@
+$(call PKG_DEPEND_INT,core/basics)
+$(call PKG_DEPEND_INT,core/algebra)
+$(call PKG_DEPEND_INT,vislearning/math)
+$(call PKG_DEPEND_INT,vislearning/baselib)
+$(call PKG_DEPEND_INT,vislearning/cbaselib)
+$(call PKG_DEPEND_INT,vislearning/classifier)
+$(call PKG_DEPEND_INT,vislearning/features)
+$(call PKG_DEPEND_INT,vislearning/matlabAccessHighLevel)
+$(call PKG_DEPEND_EXT,MATIO)
+$(call PKG_DEPEND_EXT,HDF5)
+
+

A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 1502 - 0
features/simplefeatures/tests/toyExampleLargeLargeScale.data


Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott