/** * @file ActiveLearningCheckerBoard.cpp * @brief Incrementally train the GP HIK classifier using the predictive variance and its approximations to select new samples, perform binary tests. We do not use the fast-hik implementations but perform the computations manually * @author Alexander Freytag * @date 11-06-2012 */ #include #include #include #include #include #include #include #include #include #include #include #include // QT Interface for image display // We only use the simple function showImage in this example, but there is far more // to explore in this area. #include #include "core/algebra/CholeskyRobust.h" #include "core/vector/Algorithms.h" #include #include //---------- #include "vislearning/baselib/ProgressBar.h" #include #include #include #include "vislearning/cbaselib/MultiDataset.h" #include #include "vislearning/cbaselib/ClassificationResults.h" #include //---------- #include "gp-hik-exp/progs/datatools.h" //---------- using namespace std; using namespace NICE; using namespace OBJREC; enum QueryStrategy{ RANDOM = 0, GPMEAN, GPPREDVAR, GPHEURISTIC, GPHEURISTICPLUS, GPBALANCE }; std::string convertInt(int number) { stringstream ss;//create a stringstream ss << number;//add number to the stream return ss.str();//return a string with the contents of the stream } void sampleFromCheckerboard( const int & nrOfSectorsProDim, const int & sizeOfSector, const int & examplesPerSector, std::vector & examples, NICE::Vector & labels) { int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim ); //set the labels labels.resize( nrOfSectorsTotal * examplesPerSector ); for ( int ex = 0; ex < examplesPerSector; ex++) { for ( int i = 0; i < nrOfSectorsProDim; i++) { for ( int j = 0; j < nrOfSectorsProDim; j++) { labels[ (i*nrOfSectorsProDim+j)*examplesPerSector + ex ] = ( i + j ) % 2; } } } for ( int i = 0; i < nrOfSectorsProDim; i++) { for ( int j = 0; j < nrOfSectorsProDim; j++) { for ( int ex = 0; ex < examplesPerSector; ex++) { NICE::Vector example( 3 ); double xi ( rand() % sizeOfSector + i * sizeOfSector ) ; double yi ( rand() % sizeOfSector + j * sizeOfSector ); //compute normalized histograms example[0] = xi / (nrOfSectorsTotal*sizeOfSector); example[1] = yi / (nrOfSectorsTotal*sizeOfSector); example[2] = 1.0 - example[0] - example[1]; examples.push_back( example ); } } } } void paintImageBorders( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector ) { std::cerr << "img.width(): " << img.width() << " img.height(): " << img.height() << std::endl; std::cerr << "nrOfSectorsProDim*sizeOfSector-1: " << nrOfSectorsProDim*sizeOfSector-1 << std::endl; NICE::Line l1 ( NICE::Coord( 0, 0 ) , NICE::Coord ( 0, nrOfSectorsProDim*sizeOfSector-1) ); NICE::Line l2 ( NICE::Coord( 0, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1) ); NICE::Line l3 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, 0) ); NICE::Line l4 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, 0 ) , NICE::Coord ( 0, 0 ) ); l1.draw( img, Color ( 0, 0, 0 ) ); l2.draw( img, Color ( 0, 0, 0 ) ); l3.draw( img, Color ( 0, 0, 0 ) ); l4.draw( img, Color ( 0, 0, 0 ) ); } void paintSectorsInImage( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector ) { for ( int i = 1; i < nrOfSectorsProDim; i++ ) { NICE::Line lHor ( NICE::Coord( 0, i*sizeOfSector ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector, i*sizeOfSector) ); NICE::Line lVer ( NICE::Coord( i*sizeOfSector, 0 ) , NICE::Coord ( i*sizeOfSector, nrOfSectorsProDim*sizeOfSector) ); lHor.draw( img, Color ( 0, 0, 0 ) ); lVer.draw( img, Color ( 0, 0, 0 ) ); } } void paintLabeledExamples( NICE::ColorImage & img, const NICE::Vector & y, const Examples & examples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter ) { int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim ); for ( uint lE = 0; lE < examples.size(); lE++) { // if ( y[lE] != 1) // { // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] *nrOfSectorsTotal *sizeOfSector) , // (int) (( (* examples[lE].second.svec) [1]) * nrOfSectorsTotal *sizeOfSector) ), diameter ); // circ.draw ( img, Color ( 255, 0, 0 ) ); // } // else // { // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) , // (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter ); // circ.draw ( img, Color ( 0, 0, 255 ) ); // } int thickness (2); for ( int i = 0; i < thickness; i++) { NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) , (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i ); circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 ) } } } void paintUnlabeledExamples( NICE::ColorImage & img, const vector< NICE::Vector > & trainDataOrig, const NICE::Vector & y, const std::vector & unlabeledExamples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter ) { int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim ); for ( uint uE = 0; uE < unlabeledExamples.size(); uE++) { if ( y[ unlabeledExamples[uE] ] == 0) { NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector), (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter ); circ.draw ( img, Color ( 255, 0, 0 ) ); } else { NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector) , (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter ); circ.draw ( img, Color ( 0, 0, 255 ) ); } } } void paintClassificationResult( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const ClassificationResult & result, const int & nrOfSectorsProDim, const int & sizeOfSector ) { int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim ); NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) , (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter ); if (result.classno == 1) // classified as negative { circ.draw ( img, Color ( 0, 0, 255 ) ); } else // classified as positive { circ.draw ( img, Color ( 255, 0, 0 ) ); } } void paintQueriedExamples( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const int & nrOfSectorsProDim, const int & sizeOfSector ) { int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim ); int thickness (2); for ( int i = 0; i < thickness; i++) { NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) , (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i ); circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 ) } } /** Computes from randomly or deterministically choosen trainimages kernelmatrizes and evaluates their performance, using ROI-optimization */ int main ( int argc, char **argv ) { std::cout.precision ( 10 ); std::cerr.precision ( 10 ); NICE::Config conf ( argc, argv ); int trainExPerClass = conf.gI ( "main", "trainExPerClass", 10 ); int incrementalAddSize = conf.gI("main", "incrementalAddSize", 1); int nrOfIncrements = conf.gI("main", "nrOfIncrements", 9); int num_runs = conf.gI ( "main", "num_runs", 10 ); bool do_classification = conf.gB ( "main", "do_classification", true ); double noise = conf.gD("GPHIKClassifier", "noise", 0.01); double squaredNoise = pow( noise, 2); int sizeOfSector = conf.gI( "main", "sizeOfSector", 250 ); int nrOfSectorsProDim = conf.gI( "main", "nrOfSectorsProDim", 2 ); int examplesPerSector = conf.gI( "main", "examplesPerSector", 5 ); int examplesPerSectorTest = conf.gI( "main", "examplesPerSectorTest", 50 ); bool visualizationOfResults = conf.gB( "main", "visualizationOfResults", true ); bool paintSectorBorders = conf.gB( "main", "paintSectorBorders" , true ); bool saveImages = conf.gB( "main", "saveImages", false ); std::string destinationForImages = conf.gS( "main", "destinationForImages", "" ); string queryStrategyString = conf.gS( "main", "queryStrategy", "random"); QueryStrategy queryStrategy; if (queryStrategyString.compare("gpMean") == 0) { queryStrategy = GPMEAN; } else if (queryStrategyString.compare("gpPredVar") == 0) { queryStrategy = GPPREDVAR; } else if (queryStrategyString.compare("gpHeuristic") == 0) { queryStrategy = GPHEURISTIC; } else if (queryStrategyString.compare("gpHeuristicPlus") == 0) { queryStrategy = GPHEURISTICPLUS; } else if (queryStrategyString.compare("gpBalance") == 0) { queryStrategy = GPBALANCE; } else { queryStrategy = RANDOM; } bool verbose = conf.gB ( "main", "verbose", false ); /* initialize random seed: */ // srand ( time ( NULL ) ); //with 0 for reproductive results // srand ( 0 ); //with 0 for reproductive results // =========================== INIT =========================== std::vector > recognitions_rates(nrOfIncrements+1); std::vector > AUC_scores(nrOfIncrements+1); std::vector > classification_times(nrOfIncrements+1); std::vector > IL_training_times(nrOfIncrements); for ( int run = 0; run < num_runs; run++ ) { std::cerr << "run: " << run << std::endl; srand ( run * 100000 ); //with 0 for reproductive results // read training set vector< NICE::Vector > trainDataOrig; Vector y; sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSector, trainDataOrig, y ); // ------------------ TESTING std::vector testData; Vector yTest; sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest ); if ( verbose ) { for (uint i = 0; i < trainDataOrig.size(); i++ ) { std::cerr << i << " : " << trainDataOrig[i] << std::endl; } std::cerr << "resulting binary label vector:" << y << std::endl; } std::set classesAvailable; classesAvailable.insert( 0 ); //we have a single negative class classesAvailable.insert( 1 ); //and we have a single positive class std::map nrExamplesPerClassInDataset; //simply count how many examples for every class are available std::map > examplesPerClassInDataset; //as well as their corresponding indices in the dataset //initialize this storage for (std::set::const_iterator it = classesAvailable.begin(); it != classesAvailable.end(); it++) { nrExamplesPerClassInDataset.insert(std::pair(*it,0)); examplesPerClassInDataset.insert(std::pair >(*it,std::vector(0))); } //store the indices of the examples for ( uint i = 0; i < y.size(); i++ ) { (examplesPerClassInDataset.find( y[i] )->second).push_back(i); } //and count how many examples are in every class for (std::map >::const_iterator it = examplesPerClassInDataset.begin(); it != examplesPerClassInDataset.end(); it++) { nrExamplesPerClassInDataset.find(it->first)->second = it->second.size(); } //simple output to tell how many examples we have for every class for ( std::map::const_iterator it = nrExamplesPerClassInDataset.begin(); it != nrExamplesPerClassInDataset.end(); it++) { cerr << it->first << ": " << it->second << endl; } Examples examples; //count how many examples of every class we have while actively selecting new examples //NOTE works only if we have subsequent class numbers NICE::Vector pickedExamplesPerClass( classesAvailable.size(), trainExPerClass); std::map > examplesPerClassInDatasetTmp (examplesPerClassInDataset); //chose examples for every class used for training //we will always use the first examples from each class, since the dataset comes already randomly ordered for (std::set::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++) { std::map >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt); if ( verbose ) std::cerr << "pick training examples for class " << *clIt << std::endl; for (int i = 0; i < trainExPerClass; i++) { if ( verbose ) std::cerr << "i: " << i << std::endl; int exampleIndex ( rand() % ( exIt->second.size() ) ); if ( verbose ) std::cerr << "pick example " << exIt->second[exampleIndex] << " - " << y[exIt->second[exampleIndex] ] << std::endl; Example example; NICE::Vector & xTrain = trainDataOrig[exIt->second[exampleIndex]]; example.svec = new SparseVector(xTrain); //let's take this example and its corresponding label (which should be *clIt) examples.push_back ( pair ( y[exIt->second[exampleIndex] ], example ) ); // exIt->second.erase(exIt->second.begin()+exampleIndex); } } for (uint i = 0; i < examples.size(); i++ ) { std::cerr << i << " : "; examples[i].second.svec->store(std::cerr); } //which examples are left to be actively chosen lateron? std::vector unlabeledExamples( y.size() - trainExPerClass*classesAvailable.size() ); int exCnt( 0 ); for (std::set::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++ ) { std::map >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt); //list all examples of this specific class for (std::vector::const_iterator it = exIt->second.begin(); it != exIt->second.end(); it++) { unlabeledExamples[exCnt] = *it; exCnt++; } } //Fast-HIK FPCGPHIK * classifier = new FPCGPHIK( &conf ); time_t prep_start_time = clock(); FeaturePool fp; // will be ignored classifier->train ( fp, examples ); float time_preparation = ( float ) ( clock() - prep_start_time ) ; std::cerr << "Time for training: " << time_preparation / CLOCKS_PER_SEC << std::endl; //this is only needed for the visualization NICE::Vector yBinGP ( examples.size(), -1 ); for ( uint i = 0; i < examples.size(); i++ ) { if ( examples[i].first == 1) yBinGP[i] = 1; } std::cerr << "yBinGP: " << yBinGP << std::endl; int nrOfClassesUsed = classesAvailable.size(); if ( visualizationOfResults ) { NICE::ColorImage img ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector ); img.set( 255, 255, 255 ); if ( paintSectorBorders ) paintSectorsInImage( img, nrOfSectorsProDim, sizeOfSector ); paintImageBorders( img, nrOfSectorsProDim, sizeOfSector ); //paint the example that we can query paintLabeledExamples( img, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 ); //and those that we already know paintUnlabeledExamples( img, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 ); if ( saveImages ) { img.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_000_initialBoard.ppm" ); } else showImage(img, "Initial Checkerboard"); } /* // ------------------ TESTING std::vector testData; Vector yTest; sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest ); */ NICE::Matrix confusionMatrix ( 2, 2 ); confusionMatrix.set ( 0.0 ); time_t start_time = clock(); std::vector chosen_examples_per_class ( nrOfClassesUsed ); std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; if ( do_classification ) { NICE::ColorImage imgTest; if ( visualizationOfResults ) { imgTest.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector ); imgTest.set( 255, 255, 255 ); if ( paintSectorBorders ) paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector ); paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector ); //again paint our labeled training images used so far paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 ); } ClassificationResults results; ClassificationResult result; for ( uint i = 0 ; i < testData.size(); i++ ) { const Vector & xstar = testData[i]; SparseVector xstar_sparse ( xstar ); Example example; example.svec = &xstar_sparse; result = classifier->classify( example ); if ( visualizationOfResults ) paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector ); result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0; confusionMatrix ( result.classno_groundtruth , result.classno ) ++; results.push_back( result ); } if ( visualizationOfResults ) { if ( saveImages ) { imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(0)+"ClassifResult.ppm" ); } else showImage(imgTest, "Classification Result"); } float time_classification = ( float ) ( clock() - start_time ) ; if ( verbose ) cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << endl; ( classification_times[0] ).push_back ( time_classification / CLOCKS_PER_SEC ); confusionMatrix.normalizeRowsL1(); std::cerr << confusionMatrix; double avg_recognition_rate = 0.0; for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ ) { avg_recognition_rate += confusionMatrix ( i, i ); } avg_recognition_rate /= confusionMatrix.rows(); std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << examples.size() << " training examples used" << std::endl; recognitions_rates[0].push_back ( avg_recognition_rate*100 ); std::cerr << "number of classified examples: " << results.size() << std::endl; std::cerr << "perform auc evaluation "<< std::endl; double aucScore = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << " run: " << run << " AUC-score: " << aucScore << " % -- " << examples.size() << " training examples used" << std::endl << std::endl; AUC_scores[0].push_back ( aucScore*100 ); } //Now start the Incremental-Learning-Part for (int incrementationStep = 0; incrementationStep < nrOfIncrements; incrementationStep++) { //simply count how many possible example we have int nrOfPossibleExamples( unlabeledExamples.size() ); //chose examples for every class used for training Examples newExamples; NICE::ColorImage imgAL; if ( visualizationOfResults ) { imgAL.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector ); imgAL.set( 255, 255, 255 ); if ( paintSectorBorders ) paintSectorsInImage( imgAL, nrOfSectorsProDim, sizeOfSector ); paintImageBorders( imgAL, nrOfSectorsProDim, sizeOfSector ); //again paint our labeled training images used so far paintLabeledExamples( imgAL, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 ); //and paint the unlabeled examples that are available to query paintUnlabeledExamples( imgAL, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 ); } if (queryStrategy == RANDOM) { if ( verbose ) std::cerr << "print chosen examples: " << std::endl; for (int i = 0; i < incrementalAddSize; i++) { int exampleIndex ( rand() % ( unlabeledExamples.size() ) ); Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exampleIndex] ]; newExample.svec = new SparseVector( xTrain ); int label( y[ unlabeledExamples[exampleIndex] ] ); //store this example for the visualization examples.push_back ( pair ( label, newExample ) ); //and store it to add it to the classifier newExamples.push_back ( pair ( label, newExample ) ); unlabeledExamples.erase( unlabeledExamples.begin()+exampleIndex ); if ( verbose ) std::cerr << exampleIndex+1 << " / " << incrementalAddSize << std::endl; pickedExamplesPerClass[label]++; yBinGP.append(label); if ( visualizationOfResults ) paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector ); } }// end computation for RANDOM else if ( (queryStrategy == GPMEAN) || (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTIC) || (queryStrategy == GPHEURISTICPLUS) || GPBALANCE) { //compute uncertainty values for all examples according to the query strategy std::vector > scores; scores.clear(); time_t unc_pred_start_time = clock(); for (uint exIndex = 0; exIndex < unlabeledExamples.size(); exIndex++) { NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exIndex] ]; SparseVector xTrainSparse ( xTrain ); Example example; example.svec = &xTrainSparse; if (queryStrategy == GPMEAN) { //compute the resulting score ClassificationResult r = classifier->classify( example ); //we only have two classes with "inverse" outputs scores.push_back( std::pair ( exIndex, fabs(r.scores[0]) ) ); } else if (queryStrategy == GPPREDVAR) { double uncertainty; //use the pred variance computation specified in the config file classifier->predictUncertainty( example, uncertainty ); //take the maximum of the scores for the predictive variance scores.push_back( std::pair ( exIndex, uncertainty) ); } else if (queryStrategy == GPHEURISTIC) { double uncertainty; //use the pred variance computation specified in the config file classifier->predictUncertainty( example, uncertainty ); //compute the mean values for every class ClassificationResult r = classifier->classify( example ); //take the minimum of the scores for the heuristic measure scores.push_back( std::pair ( exIndex, fabs(r.scores[0]) / sqrt( squaredNoise + uncertainty )) ); } else if (queryStrategy == GPHEURISTICPLUS) { double uncertainty; //use the pred variance computation specified in the config file classifier->predictUncertainty( example, uncertainty ); //compute the mean values for every class ClassificationResult r = classifier->classify( example ); //take the minimum of the scores for the heuristic measure scores.push_back( std::pair ( exIndex, fabs(r.scores[0]) + sqrt( squaredNoise + uncertainty )) ); } else if (queryStrategy == GPBALANCE) { double uncertainty; //use the pred variance computation specified in the config file classifier->predictUncertainty( example, uncertainty ); //compute the mean values for every class ClassificationResult r = classifier->classify( example ); double scorePositive (fabs (r.scores[0] - 1.0 )); double scoreNegative (fabs (r.scores[0] + 1.0 )); double score = scorePositive < scoreNegative ? scorePositive : scoreNegative; //take the minimum of the scores for the heuristic measure scores.push_back( std::pair ( exIndex, score / ( squaredNoise + uncertainty )) ); } } float time_score_computation = ( float ) ( clock() - unc_pred_start_time ) ; //pick the ones with best score //we could speed this up using a more sophisticated search method if ( (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTICPLUS) )//take the maximum of the scores for the predictive variance or the new weight { std::set chosenExamplesForThisRun; chosenExamplesForThisRun.clear(); for (int i = 0; i < incrementalAddSize; i++) { std::vector >::iterator bestExample = scores.begin(); std::vector >::iterator worstExample = scores.begin(); for (std::vector >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++) { if (jIt->second > bestExample->second) bestExample = jIt; if (jIt->second < worstExample->second) worstExample = jIt; } if ( verbose ) std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl; Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ]; newExample.svec = new SparseVector( xTrain ); //actually this is the ACTIVE LEARNING step (query a label) int label( y[ unlabeledExamples[bestExample->first] ] ); //store this example for the visualization examples.push_back ( pair ( label, newExample ) ); //and store it to add it to the classifier newExamples.push_back ( pair ( label, newExample ) ); //remember the index, to safely remove this example afterwards from unlabeledExamples chosenExamplesForThisRun.insert(bestExample->first); scores.erase(bestExample); pickedExamplesPerClass[label]++; yBinGP.append(label); if ( visualizationOfResults ) paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector ); } //delete the queried examples from the set of unlabeled ones //do this in an decreasing order in terms of indices to ensure valid access for (std::set::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++) { unlabeledExamples.erase( unlabeledExamples.begin()+(*it) ); } } else //take the minimum of the scores for the heuristic, heuristicPlus and the gp mean (minimum margin) { std::set chosenExamplesForThisRun; chosenExamplesForThisRun.clear(); for (int i = 0; i < incrementalAddSize; i++) { std::vector >::iterator bestExample = scores.begin(); std::vector >::iterator worstExample = scores.begin(); for (std::vector >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++) { if (jIt->second < bestExample->second) bestExample = jIt; if (jIt->second > worstExample->second) worstExample = jIt; } if ( verbose ) std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl; Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ]; newExample.svec = new SparseVector( xTrain ); //actually this is the ACTIVE LEARNING step (query a label) int label( y[ unlabeledExamples[bestExample->first] ] ); //store this example for the visualization examples.push_back ( pair ( label, newExample ) ); //and store it to add it to the classifier newExamples.push_back ( pair ( label, newExample ) ); //remember the index, to safely remove this example afterwards from unlabeledExamples chosenExamplesForThisRun.insert(bestExample->first); scores.erase(bestExample); pickedExamplesPerClass[label]++; yBinGP.append(label); if ( visualizationOfResults ) paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector ); } //delete the queried example from the set of unlabeled ones //do this in an decreasing order in terms of indices to ensure valid access for (std::set::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++) { unlabeledExamples.erase( unlabeledExamples.begin()+(*it) ); } } std::cerr << "Time used to compute query-scores for " << nrOfPossibleExamples << " examples: " << time_score_computation / CLOCKS_PER_SEC << " [s]" << std::endl; } // end computation for GPMEAN, GPPREDVAR, GPHEURISTIC, GPHEURISTICPLUS if ( visualizationOfResults ) { if ( saveImages ) { imgAL.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"_queries.ppm" ); } else showImage(imgAL, "Old and new queried example"); } std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; //incremental learning classifier->addMultipleExamples( newExamples ); //do the classification for evaluating the benefit of new examples if ( do_classification ) { NICE::ColorImage imgTest; if ( visualizationOfResults ) { imgTest.resize( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector ); imgTest.set( 255, 255, 255 ); if ( paintSectorBorders ) paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector ); paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector ); //again paint our labeled training images used so far paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 ); } time_t start_time = clock(); ClassificationResults results; confusionMatrix.set( 0.0 ); ClassificationResult result; for ( uint i = 0 ; i < testData.size(); i++ ) { const Vector & xstar = testData[i]; SparseVector xstar_sparse ( xstar ); Example example; example.svec = &xstar_sparse; result = classifier->classify( example ); if ( visualizationOfResults ) paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector ); result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0; results.push_back( result ); confusionMatrix ( result.classno_groundtruth , result.classno ) ++; } float time_classification = ( float ) ( clock() - start_time ) ; if ( verbose ) std::cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << std::endl; ( classification_times[incrementationStep+1] ).push_back ( time_classification / CLOCKS_PER_SEC ); confusionMatrix.normalizeRowsL1(); std::cerr << confusionMatrix; double avg_recognition_rate ( 0.0 ); for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ ) { avg_recognition_rate += confusionMatrix ( i, i ); } avg_recognition_rate /= confusionMatrix.rows(); std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl; recognitions_rates[incrementationStep+1].push_back ( avg_recognition_rate*100 ); double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << " run: " << run << " AUC-score: " << score << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl << std::endl; AUC_scores[incrementationStep+1].push_back ( score*100 ); if ( visualizationOfResults ) { if ( saveImages ) { imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"ClassifResult.ppm" ); } else showImage(imgTest, "Classification result after inc step " + convertInt(incrementationStep+1)); } } //classification after IL adding */ } //IL adding of different classes std::cerr << "Final statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; //don't waste memory! for ( uint tmp = 0; tmp < examples.size(); tmp++ ) { delete examples[tmp].second.svec; examples[tmp].second.svec = NULL; } }//runs // ================= EVALUATION ========================= int nrOfClassesUsed ( 2 ); //binary setting if ( do_classification ) { std::cerr << "========================" << std::endl; std::cerr << "recognition_rates" << std::endl; for ( std::vector >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::vector mean_recs; std::vector std_dev_recs; for (std::vector >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ ) { double mean_rec ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_rec += *itRun; } mean_rec /= it->size(); mean_recs.push_back ( mean_rec ); double std_dev_rec ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_rec += pow ( *itRun - mean_rec, 2 ); } std_dev_rec /= it->size(); std_dev_rec = sqrt ( std_dev_rec ); std_dev_recs.push_back ( std_dev_rec ); } int datasize ( nrOfClassesUsed*trainExPerClass ); for ( uint i = 0; i < recognitions_rates.size(); i++) { std::cerr << "size: " << datasize << " meanRR: " << mean_recs[i] << " stdDevRR: " << std_dev_recs[i] << std::endl; datasize += incrementalAddSize ; } std::cerr << "========================" << std::endl; std::cerr << "AUC_scores" << std::endl; for ( std::vector >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::vector mean_aucs; std::vector std_dev_aucs; for (std::vector >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ ) { double mean_auc ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_auc += *itRun; } mean_auc /= it->size(); mean_aucs.push_back ( mean_auc ); double std_dev_auc ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_auc += pow ( *itRun - mean_auc, 2 ); } std_dev_auc /= it->size(); std_dev_auc = sqrt ( std_dev_auc ); std_dev_aucs.push_back ( std_dev_auc ); } datasize = nrOfClassesUsed*trainExPerClass; for ( uint i = 0; i < recognitions_rates.size(); i++) { std::cerr << "size: " << datasize << " meanAUC: " << mean_aucs[i] << " stdDevAUC: " << std_dev_aucs[i] << std::endl; datasize += incrementalAddSize ; } } else { std::cerr << "========================" << std::endl; std::cerr << "No classification done therefor no classification times available." << std::endl; } return 0; }