123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921 |
- /**
- * @file ActiveLearningCheckerBoard.cpp
- * @brief Incrementally train the GP HIK classifier using the predictive variance and its approximations to select new samples, perform binary tests. We do not use the fast-hik implementations but perform the computations manually
- * @author Alexander Freytag
- * @date 11-06-2012
- */
- #include <vector>
- #include <iostream>
- #include <stdlib.h>
- #include <time.h>
- #include <set>
- #include <core/basics/Config.h>
- #include <core/basics/StringTools.h>
- #include <core/basics/Timer.h>
- #include <core/image/ImageT.h>
- #include <core/image/ColorImageT.h>
- #include <core/image/CircleT.h>
- #include <core/image/LineT.h>
- // QT Interface for image display
- // We only use the simple function showImage in this example, but there is far more
- // to explore in this area.
- #include <core/imagedisplay/ImageDisplay.h>
- #include "core/algebra/CholeskyRobust.h"
- #include "core/vector/Algorithms.h"
- #include <core/vector/SparseVectorT.h>
- #include <core/vector/VectorT.h>
- //----------
- #include "vislearning/baselib/ProgressBar.h"
- #include <vislearning/baselib/Globals.h>
- #include <vislearning/classifier/kernelclassifier/KCGPRegOneVsAll.h>
- #include <vislearning/classifier/fpclassifier/gphik/FPCGPHIK.h>
- #include "vislearning/cbaselib/MultiDataset.h"
- #include <vislearning/cbaselib/LabeledSet.h>
- #include "vislearning/cbaselib/ClassificationResults.h"
- #include <vislearning/math/kernels/KernelData.h>
- //----------
- #include "gp-hik-exp/progs/datatools.h"
- //----------
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- enum QueryStrategy{
- RANDOM = 0,
- GPMEAN,
- GPPREDVAR,
- GPHEURISTIC,
- GPHEURISTICPLUS,
- GPBALANCE
- };
-
- std::string convertInt(int number)
- {
- stringstream ss;//create a stringstream
- ss << number;//add number to the stream
- return ss.str();//return a string with the contents of the stream
- }
- void sampleFromCheckerboard( const int & nrOfSectorsProDim, const int & sizeOfSector, const int & examplesPerSector, std::vector<NICE::Vector> & examples, NICE::Vector & labels)
- {
- int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
- //set the labels
- labels.resize( nrOfSectorsTotal * examplesPerSector );
- for ( int ex = 0; ex < examplesPerSector; ex++)
- {
- for ( int i = 0; i < nrOfSectorsProDim; i++)
- {
- for ( int j = 0; j < nrOfSectorsProDim; j++)
- {
- labels[ (i*nrOfSectorsProDim+j)*examplesPerSector + ex ] = ( i + j ) % 2;
- }
- }
- }
- for ( int i = 0; i < nrOfSectorsProDim; i++)
- {
- for ( int j = 0; j < nrOfSectorsProDim; j++)
- {
- for ( int ex = 0; ex < examplesPerSector; ex++)
- {
- NICE::Vector example( 3 );
- double xi ( rand() % sizeOfSector + i * sizeOfSector ) ;
- double yi ( rand() % sizeOfSector + j * sizeOfSector );
- //compute normalized histograms
- example[0] = xi / (nrOfSectorsTotal*sizeOfSector);
- example[1] = yi / (nrOfSectorsTotal*sizeOfSector);
- example[2] = 1.0 - example[0] - example[1];
- examples.push_back( example );
- }
- }
- }
- }
- void paintImageBorders( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector )
- {
- std::cerr << "img.width(): " << img.width() << " img.height(): " << img.height() << std::endl;
- std::cerr << "nrOfSectorsProDim*sizeOfSector-1: " << nrOfSectorsProDim*sizeOfSector-1 << std::endl;
-
- NICE::Line l1 ( NICE::Coord( 0, 0 ) , NICE::Coord ( 0, nrOfSectorsProDim*sizeOfSector-1) );
- NICE::Line l2 ( NICE::Coord( 0, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1) );
- NICE::Line l3 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, 0) );
- NICE::Line l4 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, 0 ) , NICE::Coord ( 0, 0 ) );
-
- l1.draw( img, Color ( 0, 0, 0 ) );
- l2.draw( img, Color ( 0, 0, 0 ) );
- l3.draw( img, Color ( 0, 0, 0 ) );
- l4.draw( img, Color ( 0, 0, 0 ) );
- }
- void paintSectorsInImage( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector )
- {
- for ( int i = 1; i < nrOfSectorsProDim; i++ )
- {
- NICE::Line lHor ( NICE::Coord( 0, i*sizeOfSector ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector, i*sizeOfSector) );
- NICE::Line lVer ( NICE::Coord( i*sizeOfSector, 0 ) , NICE::Coord ( i*sizeOfSector, nrOfSectorsProDim*sizeOfSector) );
- lHor.draw( img, Color ( 0, 0, 0 ) );
- lVer.draw( img, Color ( 0, 0, 0 ) );
- }
- }
- void paintLabeledExamples( NICE::ColorImage & img, const NICE::Vector & y, const Examples & examples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter )
- {
- int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
- for ( uint lE = 0; lE < examples.size(); lE++)
- {
- // if ( y[lE] != 1)
- // {
- // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] *nrOfSectorsTotal *sizeOfSector) ,
- // (int) (( (* examples[lE].second.svec) [1]) * nrOfSectorsTotal *sizeOfSector) ), diameter );
- // circ.draw ( img, Color ( 255, 0, 0 ) );
- // }
- // else
- // {
- // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) ,
- // (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter );
- // circ.draw ( img, Color ( 0, 0, 255 ) );
- // }
- int thickness (2);
- for ( int i = 0; i < thickness; i++)
- {
- NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) ,
- (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i );
- circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 )
- }
- }
- }
- void paintUnlabeledExamples( NICE::ColorImage & img, const vector< NICE::Vector > & trainDataOrig, const NICE::Vector & y, const std::vector<int> & unlabeledExamples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter )
- {
- int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
- for ( uint uE = 0; uE < unlabeledExamples.size(); uE++)
- {
- if ( y[ unlabeledExamples[uE] ] == 0)
- {
- NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector),
- (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter );
- circ.draw ( img, Color ( 255, 0, 0 ) );
- }
- else
- {
- NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector) ,
- (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter );
- circ.draw ( img, Color ( 0, 0, 255 ) );
- }
- }
- }
- void paintClassificationResult( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const ClassificationResult & result, const int & nrOfSectorsProDim, const int & sizeOfSector )
- {
- int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
- NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) ,
- (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter );
- if (result.classno == 1) // classified as negative
- {
- circ.draw ( img, Color ( 0, 0, 255 ) );
- }
- else // classified as positive
- {
- circ.draw ( img, Color ( 255, 0, 0 ) );
- }
- }
- void paintQueriedExamples( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const int & nrOfSectorsProDim, const int & sizeOfSector )
- {
- int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
- int thickness (2);
- for ( int i = 0; i < thickness; i++)
- {
- NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) ,
- (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i );
- circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 )
- }
- }
- /**
- Computes from randomly or deterministically choosen trainimages kernelmatrizes and evaluates their performance, using ROI-optimization
- */
- int main ( int argc, char **argv )
- {
- std::cout.precision ( 10 );
- std::cerr.precision ( 10 );
- NICE::Config conf ( argc, argv );
- int trainExPerClass = conf.gI ( "main", "trainExPerClass", 10 );
- int incrementalAddSize = conf.gI("main", "incrementalAddSize", 1);
- int nrOfIncrements = conf.gI("main", "nrOfIncrements", 9);
-
- int num_runs = conf.gI ( "main", "num_runs", 10 );
- bool do_classification = conf.gB ( "main", "do_classification", true );
-
- double noise = conf.gD("GPHIKClassifier", "noise", 0.01);
- double squaredNoise = pow( noise, 2);
-
- int sizeOfSector = conf.gI( "main", "sizeOfSector", 250 );
- int nrOfSectorsProDim = conf.gI( "main", "nrOfSectorsProDim", 2 );
- int examplesPerSector = conf.gI( "main", "examplesPerSector", 5 );
- int examplesPerSectorTest = conf.gI( "main", "examplesPerSectorTest", 50 );
-
- bool visualizationOfResults = conf.gB( "main", "visualizationOfResults", true );
- bool paintSectorBorders = conf.gB( "main", "paintSectorBorders" , true );
- bool saveImages = conf.gB( "main", "saveImages", false );
- std::string destinationForImages = conf.gS( "main", "destinationForImages", "" );
-
- string queryStrategyString = conf.gS( "main", "queryStrategy", "random");
- QueryStrategy queryStrategy;
- if (queryStrategyString.compare("gpMean") == 0)
- {
- queryStrategy = GPMEAN;
- }
- else if (queryStrategyString.compare("gpPredVar") == 0)
- {
- queryStrategy = GPPREDVAR;
- }
- else if (queryStrategyString.compare("gpHeuristic") == 0)
- {
- queryStrategy = GPHEURISTIC;
- }
- else if (queryStrategyString.compare("gpHeuristicPlus") == 0)
- {
- queryStrategy = GPHEURISTICPLUS;
- }
- else if (queryStrategyString.compare("gpBalance") == 0)
- {
- queryStrategy = GPBALANCE;
- }
- else
- {
- queryStrategy = RANDOM;
- }
-
-
- bool verbose = conf.gB ( "main", "verbose", false );
- /* initialize random seed: */
- // srand ( time ( NULL ) ); //with 0 for reproductive results
- // srand ( 0 ); //with 0 for reproductive results
-
- // =========================== INIT ===========================
-
- std::vector<std::vector<double> > recognitions_rates(nrOfIncrements+1);
- std::vector<std::vector<double> > AUC_scores(nrOfIncrements+1);
- std::vector<std::vector<float> > classification_times(nrOfIncrements+1);
- std::vector<std::vector<float> > IL_training_times(nrOfIncrements);
-
- for ( int run = 0; run < num_runs; run++ )
- {
- std::cerr << "run: " << run << std::endl;
- srand ( run * 100000 ); //with 0 for reproductive results
-
- // read training set
- vector< NICE::Vector > trainDataOrig;
- Vector y;
- sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSector, trainDataOrig, y );
-
- // ------------------ TESTING
- std::vector<NICE::Vector> testData;
- Vector yTest;
- sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest );
- if ( verbose )
- {
- for (uint i = 0; i < trainDataOrig.size(); i++ )
- {
- std::cerr << i << " : " << trainDataOrig[i] << std::endl;
- }
-
- std::cerr << "resulting binary label vector:" << y << std::endl;
- }
-
- std::set<int> classesAvailable;
- classesAvailable.insert( 0 ); //we have a single negative class
- classesAvailable.insert( 1 ); //and we have a single positive class
-
- std::map<int,int> nrExamplesPerClassInDataset; //simply count how many examples for every class are available
- std::map<int,std::vector<int> > examplesPerClassInDataset; //as well as their corresponding indices in the dataset
-
- //initialize this storage
- for (std::set<int>::const_iterator it = classesAvailable.begin(); it != classesAvailable.end(); it++)
- {
- nrExamplesPerClassInDataset.insert(std::pair<int,int>(*it,0));
- examplesPerClassInDataset.insert(std::pair<int,std::vector<int> >(*it,std::vector<int>(0)));
- }
- //store the indices of the examples
- for ( uint i = 0; i < y.size(); i++ )
- {
- (examplesPerClassInDataset.find( y[i] )->second).push_back(i);
- }
-
- //and count how many examples are in every class
- for (std::map<int,std::vector<int> >::const_iterator it = examplesPerClassInDataset.begin(); it != examplesPerClassInDataset.end(); it++)
- {
- nrExamplesPerClassInDataset.find(it->first)->second = it->second.size();
- }
-
- //simple output to tell how many examples we have for every class
- for ( std::map<int,int>::const_iterator it = nrExamplesPerClassInDataset.begin(); it != nrExamplesPerClassInDataset.end(); it++)
- {
- cerr << it->first << ": " << it->second << endl;
- }
-
- Examples examples;
-
- //count how many examples of every class we have while actively selecting new examples
- //NOTE works only if we have subsequent class numbers
- NICE::Vector pickedExamplesPerClass( classesAvailable.size(), trainExPerClass);
-
- std::map<int,std::vector<int> > examplesPerClassInDatasetTmp (examplesPerClassInDataset);
-
- //chose examples for every class used for training
- //we will always use the first examples from each class, since the dataset comes already randomly ordered
- for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++)
- {
- std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
- if ( verbose )
- std::cerr << "pick training examples for class " << *clIt << std::endl;
-
- for (int i = 0; i < trainExPerClass; i++)
- {
- if ( verbose )
- std::cerr << "i: " << i << std::endl;
- int exampleIndex ( rand() % ( exIt->second.size() ) );
- if ( verbose )
- std::cerr << "pick example " << exIt->second[exampleIndex] << " - " << y[exIt->second[exampleIndex] ] << std::endl;
-
- Example example;
- NICE::Vector & xTrain = trainDataOrig[exIt->second[exampleIndex]];
- example.svec = new SparseVector(xTrain);
- //let's take this example and its corresponding label (which should be *clIt)
- examples.push_back ( pair<int, Example> ( y[exIt->second[exampleIndex] ], example ) );
- //
- exIt->second.erase(exIt->second.begin()+exampleIndex);
- }
- }
-
- for (uint i = 0; i < examples.size(); i++ )
- {
- std::cerr << i << " : ";
- examples[i].second.svec->store(std::cerr);
- }
-
-
- //which examples are left to be actively chosen lateron?
- std::vector<int> unlabeledExamples( y.size() - trainExPerClass*classesAvailable.size() );
- int exCnt( 0 );
- for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++ )
- {
- std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
- //list all examples of this specific class
- for (std::vector<int>::const_iterator it = exIt->second.begin(); it != exIt->second.end(); it++)
- {
- unlabeledExamples[exCnt] = *it;
- exCnt++;
- }
- }
-
- //Fast-HIK
- FPCGPHIK * classifier = new FPCGPHIK( &conf );
-
- time_t prep_start_time = clock();
- FeaturePool fp; // will be ignored
- classifier->train ( fp, examples );
- float time_preparation = ( float ) ( clock() - prep_start_time ) ;
- std::cerr << "Time for training: " << time_preparation / CLOCKS_PER_SEC << std::endl;
-
- //this is only needed for the visualization
- NICE::Vector yBinGP ( examples.size(), -1 );
- for ( uint i = 0; i < examples.size(); i++ )
- {
- if ( examples[i].first == 1)
- yBinGP[i] = 1;
- }
- std::cerr << "yBinGP: " << yBinGP << std::endl;
-
- int nrOfClassesUsed = classesAvailable.size();
- if ( visualizationOfResults )
- {
- NICE::ColorImage img ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
- img.set( 255, 255, 255 );
-
- if ( paintSectorBorders )
- paintSectorsInImage( img, nrOfSectorsProDim, sizeOfSector );
- paintImageBorders( img, nrOfSectorsProDim, sizeOfSector );
- //paint the example that we can query
- paintLabeledExamples( img, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
- //and those that we already know
- paintUnlabeledExamples( img, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 );
-
- if ( saveImages )
- {
- img.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_000_initialBoard.ppm" );
- }
- else
- showImage(img, "Initial Checkerboard");
- }
-
- /* // ------------------ TESTING
- std::vector<NICE::Vector> testData;
- Vector yTest;
- sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest ); */
-
- NICE::Matrix confusionMatrix ( 2, 2 );
- confusionMatrix.set ( 0.0 );
-
- time_t start_time = clock();
- std::vector<int> chosen_examples_per_class ( nrOfClassesUsed );
-
- std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
- if ( do_classification )
- {
- NICE::ColorImage imgTest;
- if ( visualizationOfResults )
- {
- imgTest.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
- imgTest.set( 255, 255, 255 );
-
- if ( paintSectorBorders )
- paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector );
- paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector );
- //again paint our labeled training images used so far
- paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
- }
-
- ClassificationResults results;
- ClassificationResult result;
- for ( uint i = 0 ; i < testData.size(); i++ )
- {
- const Vector & xstar = testData[i];
- SparseVector xstar_sparse ( xstar );
- Example example;
- example.svec = &xstar_sparse;
- result = classifier->classify( example );
-
- if ( visualizationOfResults )
- paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector );
-
- result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
- confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
- results.push_back( result );
- }
-
- if ( visualizationOfResults )
- {
- if ( saveImages )
- {
- imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(0)+"ClassifResult.ppm" );
- }
- else
- showImage(imgTest, "Classification Result");
- }
- float time_classification = ( float ) ( clock() - start_time ) ;
- if ( verbose )
- cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << endl;
- ( classification_times[0] ).push_back ( time_classification / CLOCKS_PER_SEC );
-
- confusionMatrix.normalizeRowsL1();
- std::cerr << confusionMatrix;
- double avg_recognition_rate = 0.0;
- for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
- {
- avg_recognition_rate += confusionMatrix ( i, i );
- }
- avg_recognition_rate /= confusionMatrix.rows();
- std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << examples.size() << " training examples used" << std::endl;
- recognitions_rates[0].push_back ( avg_recognition_rate*100 );
- std::cerr << "number of classified examples: " << results.size() << std::endl;
- std::cerr << "perform auc evaluation "<< std::endl;
- double aucScore = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
-
- std::cerr << " run: " << run << " AUC-score: " << aucScore << " % -- " << examples.size() << " training examples used" << std::endl << std::endl;
- AUC_scores[0].push_back ( aucScore*100 );
- }
- //Now start the Incremental-Learning-Part
-
- for (int incrementationStep = 0; incrementationStep < nrOfIncrements; incrementationStep++)
- {
- //simply count how many possible example we have
- int nrOfPossibleExamples( unlabeledExamples.size() );
-
- //chose examples for every class used for training
- Examples newExamples;
-
- NICE::ColorImage imgAL;
-
- if ( visualizationOfResults )
- {
- imgAL.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
- imgAL.set( 255, 255, 255 );
-
- if ( paintSectorBorders )
- paintSectorsInImage( imgAL, nrOfSectorsProDim, sizeOfSector );
- paintImageBorders( imgAL, nrOfSectorsProDim, sizeOfSector );
- //again paint our labeled training images used so far
- paintLabeledExamples( imgAL, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
- //and paint the unlabeled examples that are available to query
- paintUnlabeledExamples( imgAL, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 );
- }
-
- if (queryStrategy == RANDOM)
- {
- if ( verbose )
- std::cerr << "print chosen examples: " << std::endl;
- for (int i = 0; i < incrementalAddSize; i++)
- {
- int exampleIndex ( rand() % ( unlabeledExamples.size() ) );
-
- Example newExample;
- NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exampleIndex] ];
- newExample.svec = new SparseVector( xTrain );
- int label( y[ unlabeledExamples[exampleIndex] ] );
- //store this example for the visualization
- examples.push_back ( pair<int, Example> ( label, newExample ) );
- //and store it to add it to the classifier
- newExamples.push_back ( pair<int, Example> ( label, newExample ) );
- unlabeledExamples.erase( unlabeledExamples.begin()+exampleIndex );
- if ( verbose )
- std::cerr << exampleIndex+1 << " / " << incrementalAddSize << std::endl;
-
- pickedExamplesPerClass[label]++;
- yBinGP.append(label);
-
- if ( visualizationOfResults )
- paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
- }
- }// end computation for RANDOM
- else if ( (queryStrategy == GPMEAN) || (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTIC) || (queryStrategy == GPHEURISTICPLUS) || GPBALANCE)
- {
- //compute uncertainty values for all examples according to the query strategy
- std::vector<std::pair<int,double> > scores;
- scores.clear();
- time_t unc_pred_start_time = clock();
- for (uint exIndex = 0; exIndex < unlabeledExamples.size(); exIndex++)
- {
- NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exIndex] ];
- SparseVector xTrainSparse ( xTrain );
- Example example;
- example.svec = &xTrainSparse;
- if (queryStrategy == GPMEAN)
- {
- //compute the resulting score
- ClassificationResult r = classifier->classify( example );
- //we only have two classes with "inverse" outputs
- scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) ) );
- }
- else if (queryStrategy == GPPREDVAR)
- {
- double uncertainty;
- //use the pred variance computation specified in the config file
- classifier->predictUncertainty( example, uncertainty );
- //take the maximum of the scores for the predictive variance
- scores.push_back( std::pair<int,double> ( exIndex, uncertainty) );
- }
- else if (queryStrategy == GPHEURISTIC)
- {
- double uncertainty;
- //use the pred variance computation specified in the config file
- classifier->predictUncertainty( example, uncertainty );
- //compute the mean values for every class
- ClassificationResult r = classifier->classify( example );
- //take the minimum of the scores for the heuristic measure
- scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) / sqrt( squaredNoise + uncertainty )) );
- }
- else if (queryStrategy == GPHEURISTICPLUS)
- {
- double uncertainty;
- //use the pred variance computation specified in the config file
- classifier->predictUncertainty( example, uncertainty );
- //compute the mean values for every class
- ClassificationResult r = classifier->classify( example );
- //take the minimum of the scores for the heuristic measure
- scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) + sqrt( squaredNoise + uncertainty )) );
- }
- else if (queryStrategy == GPBALANCE)
- {
- double uncertainty;
- //use the pred variance computation specified in the config file
- classifier->predictUncertainty( example, uncertainty );
- //compute the mean values for every class
- ClassificationResult r = classifier->classify( example );
- double scorePositive (fabs (r.scores[0] - 1.0 ));
- double scoreNegative (fabs (r.scores[0] + 1.0 ));
- double score = scorePositive < scoreNegative ? scorePositive : scoreNegative;
- //take the minimum of the scores for the heuristic measure
- scores.push_back( std::pair<int,double> ( exIndex, score / ( squaredNoise + uncertainty )) );
- }
- }
- float time_score_computation = ( float ) ( clock() - unc_pred_start_time ) ;
-
- //pick the ones with best score
- //we could speed this up using a more sophisticated search method
-
- if ( (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTICPLUS) )//take the maximum of the scores for the predictive variance or the new weight
- {
- std::set<int> chosenExamplesForThisRun;
- chosenExamplesForThisRun.clear();
- for (int i = 0; i < incrementalAddSize; i++)
- {
- std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
- std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
-
- for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
- {
- if (jIt->second > bestExample->second)
- bestExample = jIt;
- if (jIt->second < worstExample->second)
- worstExample = jIt;
- }
- if ( verbose )
- std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
-
- Example newExample;
- NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
- newExample.svec = new SparseVector( xTrain );
- //actually this is the ACTIVE LEARNING step (query a label)
- int label( y[ unlabeledExamples[bestExample->first] ] );
- //store this example for the visualization
- examples.push_back ( pair<int, Example> ( label, newExample ) );
- //and store it to add it to the classifier
- newExamples.push_back ( pair<int, Example> ( label, newExample ) );
- //remember the index, to safely remove this example afterwards from unlabeledExamples
- chosenExamplesForThisRun.insert(bestExample->first);
- scores.erase(bestExample);
- pickedExamplesPerClass[label]++;
- yBinGP.append(label);
-
- if ( visualizationOfResults )
- paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
- }
-
- //delete the queried examples from the set of unlabeled ones
- //do this in an decreasing order in terms of indices to ensure valid access
- for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
- {
- unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
- }
- }
- else //take the minimum of the scores for the heuristic, heuristicPlus and the gp mean (minimum margin)
- {
- std::set<int> chosenExamplesForThisRun;
- chosenExamplesForThisRun.clear();
- for (int i = 0; i < incrementalAddSize; i++)
- {
- std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
- std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
-
- for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
- {
- if (jIt->second < bestExample->second)
- bestExample = jIt;
- if (jIt->second > worstExample->second)
- worstExample = jIt;
- }
- if ( verbose )
- std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
- Example newExample;
- NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
- newExample.svec = new SparseVector( xTrain );
- //actually this is the ACTIVE LEARNING step (query a label)
- int label( y[ unlabeledExamples[bestExample->first] ] );
- //store this example for the visualization
- examples.push_back ( pair<int, Example> ( label, newExample ) );
- //and store it to add it to the classifier
- newExamples.push_back ( pair<int, Example> ( label, newExample ) );
- //remember the index, to safely remove this example afterwards from unlabeledExamples
- chosenExamplesForThisRun.insert(bestExample->first);
- scores.erase(bestExample);
- pickedExamplesPerClass[label]++;
- yBinGP.append(label);
-
- if ( visualizationOfResults )
- paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
- }
-
- //delete the queried example from the set of unlabeled ones
- //do this in an decreasing order in terms of indices to ensure valid access
- for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
- {
- unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
- }
- }
-
- std::cerr << "Time used to compute query-scores for " << nrOfPossibleExamples << " examples: " << time_score_computation / CLOCKS_PER_SEC << " [s]" << std::endl;
- } // end computation for GPMEAN, GPPREDVAR, GPHEURISTIC, GPHEURISTICPLUS
-
- if ( visualizationOfResults )
- {
- if ( saveImages )
- {
- imgAL.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"_queries.ppm" );
- }
- else
- showImage(imgAL, "Old and new queried example");
- }
-
- std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
- //incremental learning
- classifier->addMultipleExamples( newExamples );
-
- //do the classification for evaluating the benefit of new examples
- if ( do_classification )
- {
-
- NICE::ColorImage imgTest;
- if ( visualizationOfResults )
- {
- imgTest.resize( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
- imgTest.set( 255, 255, 255 );
-
- if ( paintSectorBorders )
- paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector );
- paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector );
- //again paint our labeled training images used so far
- paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
- }
-
- time_t start_time = clock();
- ClassificationResults results;
- confusionMatrix.set( 0.0 );
- ClassificationResult result;
- for ( uint i = 0 ; i < testData.size(); i++ )
- {
- const Vector & xstar = testData[i];
- SparseVector xstar_sparse ( xstar );
-
- Example example;
- example.svec = &xstar_sparse;
- result = classifier->classify( example );
-
- if ( visualizationOfResults )
- paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector );
-
- result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
- results.push_back( result );
- confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
- }
- float time_classification = ( float ) ( clock() - start_time ) ;
- if ( verbose )
- std::cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << std::endl;
- ( classification_times[incrementationStep+1] ).push_back ( time_classification / CLOCKS_PER_SEC );
-
- confusionMatrix.normalizeRowsL1();
- std::cerr << confusionMatrix;
- double avg_recognition_rate ( 0.0 );
- for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
- {
- avg_recognition_rate += confusionMatrix ( i, i );
- }
- avg_recognition_rate /= confusionMatrix.rows();
-
- std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl;
- recognitions_rates[incrementationStep+1].push_back ( avg_recognition_rate*100 );
-
- double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
- std::cerr << " run: " << run << " AUC-score: " << score << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl << std::endl;
- AUC_scores[incrementationStep+1].push_back ( score*100 );
-
- if ( visualizationOfResults )
- {
- if ( saveImages )
- {
- imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"ClassifResult.ppm" );
- }
- else
- showImage(imgTest, "Classification result after inc step " + convertInt(incrementationStep+1));
- }
- } //classification after IL adding */
- } //IL adding of different classes
- std::cerr << "Final statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
-
- //don't waste memory!
- for ( uint tmp = 0; tmp < examples.size(); tmp++ )
- {
- delete examples[tmp].second.svec;
- examples[tmp].second.svec = NULL;
- }
- }//runs
-
- // ================= EVALUATION =========================
-
- int nrOfClassesUsed ( 2 ); //binary setting
- if ( do_classification )
- {
- std::cerr << "========================" << std::endl;
- std::cerr << "recognition_rates" << std::endl;
- for ( std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
- {
- for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
- {
- std::cerr << *jt << " ";
- }
- std::cerr << std::endl;
- }
- std::vector<double> mean_recs;
- std::vector<double> std_dev_recs;
- for (std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
- {
- double mean_rec ( 0.0 );
- for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
- {
- mean_rec += *itRun;
- }
- mean_rec /= it->size();
- mean_recs.push_back ( mean_rec );
- double std_dev_rec ( 0.0 );
- for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
- {
- std_dev_rec += pow ( *itRun - mean_rec, 2 );
- }
- std_dev_rec /= it->size();
- std_dev_rec = sqrt ( std_dev_rec );
- std_dev_recs.push_back ( std_dev_rec );
- }
- int datasize ( nrOfClassesUsed*trainExPerClass );
- for ( uint i = 0; i < recognitions_rates.size(); i++)
- {
- std::cerr << "size: " << datasize << " meanRR: " << mean_recs[i] << " stdDevRR: " << std_dev_recs[i] << std::endl;
- datasize += incrementalAddSize ;
- }
-
- std::cerr << "========================" << std::endl;
- std::cerr << "AUC_scores" << std::endl;
- for ( std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
- {
- for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
- {
- std::cerr << *jt << " ";
- }
- std::cerr << std::endl;
- }
- std::vector<double> mean_aucs;
- std::vector<double> std_dev_aucs;
- for (std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
- {
- double mean_auc ( 0.0 );
- for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
- {
- mean_auc += *itRun;
- }
- mean_auc /= it->size();
- mean_aucs.push_back ( mean_auc );
- double std_dev_auc ( 0.0 );
- for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
- {
- std_dev_auc += pow ( *itRun - mean_auc, 2 );
- }
- std_dev_auc /= it->size();
- std_dev_auc = sqrt ( std_dev_auc );
- std_dev_aucs.push_back ( std_dev_auc );
- }
- datasize = nrOfClassesUsed*trainExPerClass;
- for ( uint i = 0; i < recognitions_rates.size(); i++)
- {
- std::cerr << "size: " << datasize << " meanAUC: " << mean_aucs[i] << " stdDevAUC: " << std_dev_aucs[i] << std::endl;
- datasize += incrementalAddSize ;
- }
- }
- else
- {
- std::cerr << "========================" << std::endl;
- std::cerr << "No classification done therefor no classification times available." << std::endl;
- }
- return 0;
- }
|