123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- /**
- * @file eccv2012-synthetic.cpp
- * @brief ECCV 2012 Experiment with synthetic histograms to show the possibility of feature relevance selection
- * @author Alexander Freytag
- * @date 17-02-2012 (dd-mm-yyyy)
- */
- // STL includes
- #include <vector>
- #include <fstream>
- #include <iostream>
- #include <sstream>
- #include <limits>
- // NICE-core includes
- #include <core/basics/vectorio.h>
- #include <core/basics/Config.h>
- #include <core/basics/numerictools.h>
- #include <core/basics/Timer.h>
- //
- #include <core/image/Histogram.h>
- //
- #include <core/vector/VectorT.h>
- // NICE-vislearning includes
- #include <vislearning/cbaselib/ClassificationResults.h>
- // gp-hik-core includes
- #include <gp-hik-core/FastMinKernel.h>
- #include <gp-hik-core/FMKGPHyperparameterOptimization.h>
- //
- #include <gp-hik-core/parameterizedFunctions/PFAbsExp.h>
- #include <gp-hik-core/parameterizedFunctions/PFExp.h>
- #include <gp-hik-core/parameterizedFunctions/PFWeightedDim.h>
- //
- #include <gp-hik-core/tools.h>
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- void sampleDataOneExample(std::vector<double> & trainData, const int & classNr)
- {
- double sum(0.0);
- double h1,h2,h3,h4,h5,h6,h7,h8;
- if (classNr == 1)
- {
- while (true)
- {
- h1 = fabs(randGaussDouble(0.03)); sum += h1;
- h2 = randDouble(0.25); sum += h2;
- h3 = fabs(randGaussDouble(0.07)); sum += h3;
- h4 = fabs(randGaussDouble(0.05)); sum += h4;
- h5 = randDouble(0.25); sum += h5;
- h6 = randDouble(0.25); sum += h6;
- h7 = randDouble(0.25); sum += h7;
- if (sum <= 1.0) // if sum is smaller than 1.0, everything is ok
- break;
- sum = 0.0;
- }
- h8 = 1.0-sum;
- }
- else
- {
- while (true)
- {
- h1 = randDouble(0.25); sum += h1;
- h2 = fabs(randGaussDouble(0.07)); sum += h2;
- h3 = fabs(randGaussDouble(0.12)); sum += h3;
- h4 = fabs(randGaussDouble(0.05)); sum += h4;
- h5 = randDouble(0.25); sum += h5;
- h6 = randDouble(0.25); sum += h6;
- h7 = randDouble(0.25); sum += h7;
- if (sum <= 1.0) // if sum is smaller than 1.0, everything is ok
- break;
- sum = 0.0;
- }
- h8 = 1.0-sum;
- }
- trainData.push_back(h1);
- trainData.push_back(h2);
- trainData.push_back(h3);
- trainData.push_back(h4);
- trainData.push_back(h5);
- trainData.push_back(h6);
- trainData.push_back(h7);
- trainData.push_back(h8);
- }
- void sampleData(std::vector< std::vector<double> > & trainData, NICE::Vector & y, const int & nrExamplesPerClass)
- {
- // initRand();
- trainData.clear();
- y.resize(2*nrExamplesPerClass);
- for (int i = 0; i < nrExamplesPerClass; i++)
- {
- //sample positive example
- y[2*i] = 1;
- std::vector<double> trainDataOneExample;
- sampleDataOneExample(trainDataOneExample, 1);
- trainData.push_back(trainDataOneExample);
-
- //sample negative example
- trainDataOneExample.clear();
- y[2*i+1] = -1;
- sampleDataOneExample(trainDataOneExample, -1);
- trainData.push_back(trainDataOneExample);
- }
- }
- void evaluateRandomDistribution(const std::vector< std::vector<double> > & trainData, const NICE::Vector & y, std::vector<NICE::Histogram> & histograms)
- {
- histograms.resize(16); // 8 dimensions in this synthetic example for two classes
-
- //init
- for (int i = 0; i < 16; i++)
- {
- histograms[i] = NICE::Histogram ( 0.0, 0.25, 10 ); // min, max, numberBins
- }
-
- histograms[0] = NICE::Histogram ( 0.0, 0.25, 10 );
- histograms[3] = NICE::Histogram ( 0.0, 0.25, 10 );
-
- histograms[9] = NICE::Histogram ( 0.0, 0.25, 10 );
- histograms[11] = NICE::Histogram ( 0.0, 0.25, 10 );
-
- histograms[7] = NICE::Histogram ( 0.0, 1.0, 10 );
- histograms[15] = NICE::Histogram ( 0.0, 1.0, 10 );
-
- for (int i = 0; i < 16; i++)
- {
- histograms[i].set(0);
- }
-
- //start
-
- int clAdd(0);
- for (int i = 0; i < trainData.size(); i++)
- {
- // std::cerr << i << " / " << trainData.size() << std::endl;
-
- //evaluation for the first class
- if (y[i] == 1)
- {
- histograms[0].increaseBin((int)floor(trainData[i][0]*40));
- histograms[1].increaseBin((int)floor(trainData[i][1]*40));
- histograms[2].increaseBin((int)floor(trainData[i][2]*40));
- histograms[3].increaseBin((int)floor(trainData[i][3]*40));
- histograms[4].increaseBin((int)floor(trainData[i][4]*40));
- histograms[5].increaseBin((int)floor(trainData[i][5]*40));
- histograms[6].increaseBin((int)floor(trainData[i][6]*40));
- histograms[7].increaseBin((int)floor(trainData[i][7]*10));
- }
- else //evaluation for the second class
- {
- histograms[8].increaseBin((int)floor(trainData[i][0]*40));
- histograms[9].increaseBin((int)floor(trainData[i][1]*40));
- histograms[10].increaseBin((int)floor(trainData[i][2]*40));
- histograms[11].increaseBin((int)floor(trainData[i][3]*40));
- histograms[12].increaseBin((int)floor(trainData[i][4]*40));
- histograms[13].increaseBin((int)floor(trainData[i][5]*40));
- histograms[14].increaseBin((int)floor(trainData[i][6]*40));
- histograms[15].increaseBin((int)floor(trainData[i][7]*10));
- }
- }
- }
- /**
-
- ECCV 2012 Experiment with synthetic data
-
- */
- int main (int argc, char **argv)
- {
- std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
- initRand();
- Config conf ( argc, argv );
- Config confBaseline ( conf );
- confBaseline.sS("HIKGP", "optimization_method", "none");
-
- string pf_baseline_s = conf.gS("main", "transformBaseline", "absexp");
- string pf_featRel_s = conf.gS("main", "transformFeatRel", "weightedDim");
- int nrRuns = conf.gI("main", "nrRuns", 1);
- int testSize = conf.gI("main", "testSize", 150); // per category
-
- bool printRandomDistribution = conf.gB("main", "printRandomDistribution", false);
-
-
- confBaseline.sD( "FMKGPHyperparameterOptimization", "parameter_upper_bound", numeric_limits<double>::max( ) );
- confBaseline.sD( "FMKGPHyperparameterOptimization", "parameter_lower_bound", numeric_limits<double>::min( ) );
-
- if ( pf_baseline_s == "absexp" )
- confBaseline.sS( "FMKGPHyperparameterOptimization", "transform", "absexp" );
- else if ( pf_baseline_s == "exp" )
- confBaseline.sS( "FMKGPHyperparameterOptimization", "transform", "exp" );
- else
- fthrow(Exception, "Parameterized function type " << pf_baseline_s << " not yet implemented");
-
-
-
- conf.sD( "FMKGPHyperparameterOptimization", "parameter_upper_bound", numeric_limits<double>::max( ) );
- conf.sD( "FMKGPHyperparameterOptimization", "parameter_lower_bound", numeric_limits<double>::min( ) );
- conf.sS( "FMKGPHyperparameterOptimization", "transform", "weightedDim" );
- int dim ( 8 );
- conf.sI( "FMKGPHyperparameterOptimization", "pf_dim", dim );
-
- std::cerr << "Transformation type baseline: " << pf_baseline_s << std::endl;
- std::cerr << "Transformation type FeatRel: " << pf_featRel_s << std::endl;
-
- std::vector<int> trainSizes; // per category
-
- // trainSizes.push_back(5);
- // trainSizes.push_back(10);
- // trainSizes.push_back(15);
- // trainSizes.push_back(20);
- // trainSizes.push_back(50);
- // trainSizes.push_back(75);
- // trainSizes.push_back(100);
- trainSizes.push_back(500);
-
- std::vector<std::vector<double> > ARRs_baseline;
- std::vector<std::vector<double> > ARRs_featRel;
-
- std::vector<std::vector<double> > AUCs_baseline;
- std::vector<std::vector<double> > AUCs_featRel;
-
- for (std::vector<int>::const_iterator trainSize = trainSizes.begin(); trainSize != trainSizes.end(); trainSize++)
- {
- std::cerr << "trainSize: " << *trainSize << std::endl;
- double AARRBaseline(0.0); // averaged average recognition rate :)
- double AARRFeatRel(0.0); // averaged average recognition rate :)
-
- double AAUCBaseline(0.0); // averaged area under curve :)
- double AAUCFeatRel(0.0); // averaged area under curve :)
-
- std::vector<double> ARRs_baseline_SingleSize;
- std::vector<double> ARRs_featRel_SingleSize;
-
- std::vector<double> AUCs_baseline_SingleSize;
- std::vector<double> AUCs_featRel_SingleSize;
-
- for (int run = 0; run < nrRuns; run++)
- {
- std::cerr << "run: " << run << std::endl;
- //----------------- TRAINING -------------------------
- //sample the training data
- std::vector< std::vector<double> > trainData;
- NICE::Vector yTrain;
- sampleData(trainData,yTrain, *trainSize);
-
-
-
- if (printRandomDistribution)
- {
- std::vector<double> borders;
- borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(1.0);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(1.0);
- std::cerr << "print distribution of features " << std::endl;
- std::vector<NICE::Histogram> histograms;
-
- evaluateRandomDistribution(trainData, yTrain, histograms);
- for (int i = 0; i < histograms.size(); i++)
- {
- int sum (0);
- std::string fn = "/home/luetz/code/fast-hik/nice/fast-hik/hist";
- std::stringstream s1;
- s1 << i/8;
- fn += s1.str();
- fn += "-";
- std::stringstream s2;
- s2 << i%8;
- fn += s2.str();
- std::cerr << "filename: "<< fn.c_str() << std::endl;
-
- std::fstream outfile;
- outfile.open( fn.c_str(), ios::out );
- if (outfile.is_open())
- {
- for (int k = 0; k < histograms[i].bins(); k++)
- {
- outfile << borders[i]*k/ (double)histograms[i].bins() << " " << histograms[i][k] << std::endl;
- sum += histograms[i][k];
- }
- outfile.close();
- }
- else{
- std::cerr << "error while opening file " << fn << std::endl;
- }
- }
- std::cerr << "ending the function, we only printed the distributions" << std::endl;
- return 0;
- }
-
- std::vector<double> meanValues;
- calculateMeanPerDimension(trainData, meanValues);
-
- transposeVectorOfVectors ( trainData );
-
- //baseline without feature relevance
- double noise = 0.1;
- FastMinKernel *fmkBaseline = new FastMinKernel ( trainData, noise );
-
- FMKGPHyperparameterOptimization hyperBaseline ( &confBaseline, fmkBaseline );
- hyperBaseline.optimize ( yTrain );
-
- //with optimization of feature relevance (= optimization of weights for each dimension)
- FastMinKernel *fmkFeatRel = new FastMinKernel ( trainData, noise, dim );
- // std::cerr << "print Parameter of pfWeightedDim" << std::endl;
- // std::cerr << pfFeatRel->parameters() << std::endl;
- // std::cerr << "print Matrix after transformation" << std::endl;
- // pfFeatRel->applyFunctionToFeatureMatrix(fmkFeatRel->featureMatrix());
- // fmkFeatRel->featureMatrix().print();
-
- FMKGPHyperparameterOptimization hyperFeatRel ( &conf, fmkFeatRel, "FMKGPHyperparameterOptimization" /*config section*/ );
- hyperFeatRel.optimize ( yTrain );
- std::cerr << "meanValues: ";
- for (std::vector<double>::const_iterator meanIt = meanValues.begin(); meanIt != meanValues.end(); meanIt++)
- {
- std::cerr << *meanIt << " ";
- }
- std::cerr << std::endl << std::endl;
-
- //----------------- TESTING -------------------------
- //sample the training data
- std::vector< std::vector<double> > testData;
- NICE::Vector yTest;
- sampleData(testData,yTest, testSize);
-
- // std::cerr << "Printing testData: " << std::endl;
- // printMatrix<double>(testData);
- // std::cerr << yTest << std::endl;
-
- Timer t;
-
- Matrix confusionBaseline ( 2, 2, 0.0 );
- Matrix confusionFeatRel ( 2, 2, 0.0 );
-
- ClassificationResults resultsBaseline;
- ClassificationResults resultsFeatRel;
-
- for ( uint i = 0 ; i < testData.size(); i++ )
- {
- const Vector xstar(testData[i]);
- // the following is just to be sure that we
- // do not count the time necessary for conversion
- SparseVector xstar_sparse ( xstar ); //default tolerance is 10e-10
- int classno_groundtruth = yTest[i];
- //dirty :(
- if ((classno_groundtruth) < 0)
- classno_groundtruth = 0;
-
- SparseVector scoresBaseline;
- t.start();
- uint classno_estimated_baseline = hyperBaseline.classify ( xstar_sparse, scoresBaseline );
- t.stop();
- scoresBaseline.store(cerr);
- cerr << "baseline [" << i << " / " << testData.size() << "] " << classno_estimated_baseline << " " << classno_groundtruth << " time: " << t.getLast() << endl;
- confusionBaseline( classno_groundtruth, classno_estimated_baseline ) += 1;
-
- // building the result
- ClassificationResult rBaseline ( classno_estimated_baseline, scoresBaseline );
- // set ground truth label
- rBaseline.classno_groundtruth = classno_groundtruth;
- resultsBaseline.push_back ( rBaseline );
-
- SparseVector scoresFeatRel;
- t.start();
- uint classno_estimated_featRel = hyperFeatRel.classify ( xstar_sparse, scoresFeatRel );
- t.stop();
- scoresFeatRel.store(cerr);
- cerr << "FeatRel [" << i << " / " << testData.size() << "] " << classno_estimated_featRel << " " << classno_groundtruth << " time: " << t.getLast() << endl;
- confusionFeatRel( classno_groundtruth, classno_estimated_featRel ) += 1;
-
- // building the result
- ClassificationResult rFeatRel ( classno_estimated_featRel, scoresFeatRel );
- // set ground truth label
- rFeatRel.classno_groundtruth = classno_groundtruth;
- resultsFeatRel.push_back ( rFeatRel );
- }
- confusionBaseline.normalizeRowsL1();
- confusionFeatRel.normalizeRowsL1();
- // --------------- ARR evaluation --------------------
- cerr << confusionBaseline << endl;
- cerr << "average recognition rate baseline: " << confusionBaseline.trace()/confusionBaseline.rows() << endl;
-
- cerr << confusionFeatRel << endl;
- cerr << "average recognition rate featRel: " << confusionFeatRel.trace()/confusionFeatRel.rows() << endl;
-
- AARRBaseline += (confusionBaseline.trace()/confusionBaseline.rows()) / nrRuns;
- ARRs_baseline_SingleSize.push_back(confusionBaseline.trace()/confusionBaseline.rows());
-
- AARRFeatRel += (confusionFeatRel.trace()/confusionFeatRel.rows()) / nrRuns;
- ARRs_featRel_SingleSize.push_back(confusionFeatRel.trace()/confusionFeatRel.rows());
- // --------------- AUC evaluation --------------------
- double perfvalueBaseline = resultsBaseline.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
- cerr << "AUC Baseline: " << perfvalueBaseline << endl;
- double perfvalueFeatRel = resultsFeatRel.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
- cerr << "AUC FeatRel: " << perfvalueFeatRel << endl;
-
- AAUCBaseline += perfvalueBaseline / nrRuns;
- AUCs_baseline_SingleSize.push_back(perfvalueBaseline);
-
- AAUCFeatRel += perfvalueFeatRel / nrRuns;
- AUCs_featRel_SingleSize.push_back(perfvalueFeatRel);
- }
-
- ARRs_baseline.push_back(ARRs_baseline_SingleSize);
- ARRs_featRel.push_back(ARRs_featRel_SingleSize);
- AUCs_baseline.push_back(AUCs_baseline_SingleSize);
- AUCs_featRel.push_back(AUCs_featRel_SingleSize);
- }
- std::cerr << "================ EVALUATION ARR======================== " << std::endl;
- std::cerr << "trainsize << meanBaseline << stdDevBaseline << meanFeatRel << stdDevFeatRel " << std::endl;
- for (uint trainSizeIdx = 0; trainSizeIdx < trainSizes.size(); trainSizeIdx++)
- {
- double meanBaseline( calculating_mean(ARRs_baseline[trainSizeIdx]) );
- double meanFeatRel( calculating_mean(ARRs_featRel[trainSizeIdx]) );
-
- double stdDevBaseline(calculating_std_dev(ARRs_baseline[trainSizeIdx], meanBaseline));
- double stdDevFeatRel(calculating_std_dev(ARRs_featRel[trainSizeIdx], meanFeatRel));
-
- std::cerr << trainSizes[trainSizeIdx] << " " << meanBaseline << " " << stdDevBaseline << " " << meanFeatRel << " " << stdDevFeatRel << std::endl;
- }
-
- std::cerr << std::endl << std::endl << "================ EVALUATION AUC======================== " << std::endl;
- std::cerr << "trainsize << meanBaseline << stdDevBaseline << meanFeatRel << stdDevFeatRel " << std::endl;
- for (uint trainSizeIdx = 0; trainSizeIdx < trainSizes.size(); trainSizeIdx++)
- {
- double meanBaseline( calculating_mean(AUCs_baseline[trainSizeIdx]) );
- double meanFeatRel( calculating_mean(AUCs_featRel[trainSizeIdx]) );
-
- double stdDevBaseline(calculating_std_dev(AUCs_baseline[trainSizeIdx], meanBaseline));
- double stdDevFeatRel(calculating_std_dev(AUCs_featRel[trainSizeIdx], meanFeatRel));
-
- std::cerr << trainSizes[trainSizeIdx] << " " << meanBaseline << " " << stdDevBaseline << " " << meanFeatRel << " " << stdDevFeatRel << std::endl;
- }
- return 0;
- }
|