/** * @file IL_AL_Binary_GPBaseline.cpp * @brief Incrementally train the GP HIK classifier using the predictive variance and its approximations to select new samples, perform binary tests. We do not use the fast-hik implementations but perform the computations manually * @author Alexander Freytag * @date 11-06-2012 */ #include #include #include #include #include #include #include #include #include #include #include //---------- #include #include #include #include #include #include #include #include //---------- #include "gp-hik-exp/progs/datatools.h" // using namespace std; using namespace NICE; using namespace OBJREC; enum verbose_level {NONE = 0, LOW = 1, MEDIUM = 2, EVERYTHING = 3}; enum QueryStrategy{ RANDOM = 0, GPMEAN, GPPREDVAR, GPHEURISTIC }; std::string convertInt(int number) { stringstream ss;//create a stringstream ss << number;//add number to the stream return ss.str();//return a string with the contents of the stream } double measureMinimumDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b) { double sum(0.0); NICE::SparseVector::const_iterator aIt = a.begin(); NICE::SparseVector::const_iterator bIt = b.begin(); while ( (aIt != a.end()) && (bIt != b.end()) ) { if (aIt->first == bIt->first) { sum += std::min( aIt->second, bIt->second ); aIt++; bIt++; } else if ( aIt->first < bIt->first) { //minimum is zero aIt++; } else { //minimum is zero bIt++; } } //we do not have to compute the remaining values for the second iterator, since the other one is since in the corresponding dimensions return sum; } /** Computes from randomly or deterministically choosen trainimages kernelmatrizes and evaluates their performance, using ROI-optimization */ int main ( int argc, char **argv ) { std::cout.precision ( 10 ); std::cerr.precision ( 10 ); NICE::Config conf ( argc, argv ); int trainExPerClass = conf.gI ( "GP_IL", "trainExPerClass", 10 ); int incrementalAddSize = conf.gI("GP_IL", "incrementalAddSize", 1); int nrOfIncrements = conf.gI("GP_IL", "nrOfIncrements", 9); int num_runs = conf.gI ( "GP_IL", "num_runs", 10 ); bool do_classification = conf.gB ( "GP_IL", "do_classification", true ); double noise = conf.gD("GPHIKClassifier", "noise", 0.01); double squaredNoise = pow( noise, 2); int minClass = conf.gI( "main", "minClass", 0); int maxClass = conf.gI( "main", "maxClass", 15); string queryStrategyString = conf.gS( "main", "queryStrategy", "random"); QueryStrategy queryStrategy; if (queryStrategyString.compare("gpMean") == 0) { queryStrategy = GPMEAN; } else if (queryStrategyString.compare("gpPredVar") == 0) { queryStrategy = GPPREDVAR; } else if (queryStrategyString.compare("gpHeuristic") == 0) { queryStrategy = GPHEURISTIC; } else { queryStrategy = RANDOM; } int verbose_int = conf.gI ( "GP_IL", "verbose", 0 ); verbose_level verbose ( NONE ); switch ( verbose_int ) { case 0: verbose = NONE; break; case 1: verbose = LOW; break; case 2: verbose = MEDIUM; break; case 3: verbose = EVERYTHING; break; } std::string locationOfPermutations = conf.gS( "main", "locationOfPermutations", "/home/luetz/data/images/caltech-101/" ); std::string classselection_train = conf.gS( "main", "classselection_train", "*" ); std::string classselection_test = conf.gS( "main", "classselection_test", "*" ); std::string examples_train = conf.gS( "main", "examples_train", "seq * 100" ); std::string examples_test = conf.gS( "main", "examples_test", "seq * 50" ); /* initialize random seed: */ srand ( time ( NULL ) ); //with 0 for reproductive results // srand ( 0 ); //with 0 for reproductive results for (int currentClass = minClass; currentClass <= maxClass; currentClass++) { std::cerr << "start binary experiments for class " << currentClass << std::endl; // =========================== INIT =========================== std::vector > recognitions_rates(nrOfIncrements+1); std::vector > AUC_scores(nrOfIncrements+1); std::vector > classification_times(nrOfIncrements+1); std::vector > IL_training_times(nrOfIncrements); for ( int run = 0; run < num_runs; run++ ) { std::cerr << "run: " << run << std::endl; NICE::Config confCurrentRun ( conf ); confCurrentRun.sS( "train"+convertInt(run), "dataset", locationOfPermutations+"run"+convertInt(run)+".train" ); confCurrentRun.sS( "train"+convertInt(run), "classselection_train", classselection_train ); confCurrentRun.sS( "train"+convertInt(run), "examples_train", examples_train ); confCurrentRun.sS( "test"+convertInt(run), "dataset", locationOfPermutations+"run"+convertInt(run)+".test" ); confCurrentRun.sS( "test"+convertInt(run), "classselection_test", classselection_test ); confCurrentRun.sS( "train"+convertInt(run), "examples_test", examples_test ); //15-scenes settings std::string ext = confCurrentRun.gS("main", "ext", ".txt"); std::cerr << "Using cache extension: " << ext << std::endl; OBJREC::MultiDataset md ( &confCurrentRun ); std::cerr << "now read the dataset" << std::endl; // read training set vector< NICE::Vector > trainDataOrig; Vector y; string trainRun ( "train" + convertInt( run ) ); std::cerr << "look for " << trainRun << std::endl; const LabeledSet *train = md[ trainRun ]; //previously, we only selected "train", no we select the permutation for this run //we just store the filenames to have a look which image we picked in every step std::vector filenamesTraining; readData< std::vector< NICE::Vector >, NICE::Vector > ( confCurrentRun, *train, trainDataOrig, y, filenamesTraining, ext ); std::cerr << "label vector after reading: " << y << std::endl; bool firstPositivePrinted( false ); //assure the binary setting for ( uint i = 0; i < y.size(); i++ ) { if ( y[i] == currentClass) { if ( !firstPositivePrinted ) { std::cerr << "first positive example: " << filenamesTraining[i] << std::endl; firstPositivePrinted = true; } y[i] = 1; } else y[i] = 0;//-1; } std::cerr << "resulting binary label vector:" << y << std::endl; std::set classesAvailable; classesAvailable.insert( 0 ); //we have a single negative class classesAvailable.insert( 1 ); //and we have a single positive class std::map nrExamplesPerClassInDataset; //simply count how many examples for every class are available std::map > examplesPerClassInDataset; //as well as their corresponding indices in the dataset //initialize this storage for (std::set::const_iterator it = classesAvailable.begin(); it != classesAvailable.end(); it++) { nrExamplesPerClassInDataset.insert(std::pair(*it,0)); examplesPerClassInDataset.insert(std::pair >(*it,std::vector(0))); } //store the indices of the examples for ( uint i = 0; i < y.size(); i++ ) { (examplesPerClassInDataset.find( y[i] )->second).push_back(i); } //and count how many examples are in every class for (std::map >::const_iterator it = examplesPerClassInDataset.begin(); it != examplesPerClassInDataset.end(); it++) { nrExamplesPerClassInDataset.find(it->first)->second = it->second.size(); } //simple output to tell how many examples we have for every class for ( std::map::const_iterator it = nrExamplesPerClassInDataset.begin(); it != nrExamplesPerClassInDataset.end(); it++) { cerr << it->first << ": " << it->second << endl; } Examples examples; //count how many examples of every class we have while actively selecting new examples //NOTE works only if we have subsequent class numbers NICE::Vector pickedExamplesPerClass( classesAvailable.size(), trainExPerClass); std::map > examplesPerClassInDatasetTmp (examplesPerClassInDataset); //chose examples for every class used for training //we will always use the first examples from each class, since the dataset comes already randomly ordered for (std::set::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++) { std::map >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt); std::cerr << "pick training examples for class " << *clIt << std::endl; for (int i = 0; i < trainExPerClass; i++) { std::cerr << "i: " << i << std::endl; int exampleIndex ( 0 ); //old: rand() % ( exIt->second.size() ) ); std::cerr << "pick example " << exIt->second[exampleIndex] << " - " << y[exIt->second[exampleIndex] ] << " -- " << filenamesTraining[exIt->second[exampleIndex]] << std::endl; Example example; NICE::Vector & xTrain = trainDataOrig[exIt->second[exampleIndex]]; example.svec = new SparseVector(xTrain); //let's take this example and its corresponding label (which should be *clIt) examples.push_back ( pair ( y[exIt->second[exampleIndex] ], example ) ); // exIt->second.erase(exIt->second.begin()+exampleIndex); } } std::vector filenamesUnlabeled; filenamesUnlabeled.clear(); //which examples are left to be actively chosen lateron? std::vector unlabeledExamples( y.size() - trainExPerClass*classesAvailable.size() ); int exCnt( 0 ); for (std::set::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++ ) { std::map >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt); //list all examples of this specific class for (std::vector::const_iterator it = exIt->second.begin(); it != exIt->second.end(); it++) { unlabeledExamples[exCnt] = *it; exCnt++; filenamesUnlabeled.push_back( filenamesTraining[*it] ); } } //brute force GP regression graining Timer t; t.start(); NICE::Matrix kernelMatrix (examples.size(), examples.size(), 0.0); //and set zero to minus one for the internal GP computations for expected mean NICE::Vector yBinGP ( examples.size(), -1 ); //now compute the kernelScores for every element double kernelScore(0.0); for ( uint i = 0; i < examples.size(); i++ ) { for ( uint j = i; j < examples.size(); j++ ) { kernelScore = measureMinimumDistance(* examples[i].second.svec, * examples[j].second.svec); kernelMatrix(i,j) = kernelScore; if (i != j) kernelMatrix(j,i) = kernelScore; } if ( examples[i].first == 1) yBinGP[i] = 1; } //adding some noise, if necessary if ( squaredNoise != 0.0 ) { kernelMatrix.addIdentity( noise ); } else { //zero was already set } std::cerr << "noise: " << noise << std::endl; std::cerr << "kernelMatrix: " << kernelMatrix << std::endl; //compute its inverse //noise is already added :) CholeskyRobust cr ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/); NICE::Matrix choleskyMatrix ( examples.size(), examples.size(), 0.0 ); cr.robustChol ( kernelMatrix, choleskyMatrix ); NICE::Vector GPrightPart ( examples.size() ); choleskySolveLargeScale ( choleskyMatrix, yBinGP, GPrightPart ); std::cerr << "choleskyMatrix: " << choleskyMatrix << std::endl; t.stop(); cerr << "Time used for initial training: " << t.getLast() << endl; int nrOfClassesUsed = classesAvailable.size(); // ------------------ TESTING string testRun ( "test" + convertInt( run ) ); const LabeledSet *test = md[ testRun ]; //previously, we only selected "test", now we select the permutation for this run VVector testData; Vector yTest; readData< VVector, Vector > ( confCurrentRun, *test, testData, yTest, ext ); NICE::Matrix confusionMatrix ( 2, 2 ); confusionMatrix.set ( 0.0 ); time_t start_time = clock(); std::vector chosen_examples_per_class ( nrOfClassesUsed ); std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; if ( do_classification ) { ClassificationResults results; for ( uint i = 0 ; i < testData.size(); i++ ) { const Vector & xstar = testData[i]; SparseVector xstar_sparse ( xstar ); //compute similarities NICE::Vector kernelVector ( examples.size(), 0.0 ); for ( uint j = 0; j < examples.size(); j++ ) { kernelVector[j] = measureMinimumDistance( * examples[j].second.svec, xstar_sparse ); } //compute the resulting score double score = kernelVector.scalarProduct( GPrightPart ); //this is the standard score-object needed for the evaluation FullVector scores ( 2 ); scores[0] = -1.0*score; scores[1] = score; ClassificationResult result ( scores.maxElement(), scores ); result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0; result.classno = ( score >= 0.0 ) ? 1 : 0; confusionMatrix ( result.classno_groundtruth , result.classno ) ++; results.push_back( result ); } float time_classification = ( float ) ( clock() - start_time ) ; if ( verbose >= LOW ) cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << endl; ( classification_times[0] ).push_back ( time_classification / CLOCKS_PER_SEC ); confusionMatrix.normalizeRowsL1(); std::cerr << confusionMatrix; double avg_recognition_rate = 0.0; for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ ) { avg_recognition_rate += confusionMatrix ( i, i ); } avg_recognition_rate /= confusionMatrix.rows(); std::cerr << "class: " << currentClass << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << examples.size() << " training examples used" << std::endl; recognitions_rates[0].push_back ( avg_recognition_rate*100 ); std::cerr << "number of classified examples: " << results.size() << std::endl; std::cerr << "perform auc evaluation "<< std::endl; double aucScore = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << "class: " << currentClass << " run: " << run << " AUC-score: " << aucScore << " % -- " << examples.size() << " training examples used" << std::endl << std::endl; AUC_scores[0].push_back ( aucScore*100 ); } //Now start the Incremental-Learning-Part for (int incrementationStep = 0; incrementationStep < nrOfIncrements; incrementationStep++) { //simply count how many possible example we have int nrOfPossibleExamples( unlabeledExamples.size() ); if (queryStrategy == RANDOM) { std::cerr << "print chosen examples: " << std::endl; for (int i = 0; i < incrementalAddSize; i++) { int exampleIndex ( rand() % ( unlabeledExamples.size() ) ); Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exampleIndex] ]; newExample.svec = new SparseVector( xTrain ); int label( y[ unlabeledExamples[exampleIndex] ] ); examples.push_back ( pair ( label, newExample ) ); unlabeledExamples.erase( unlabeledExamples.begin()+exampleIndex ); std::cerr << exampleIndex+1 << " / " << incrementalAddSize << " : " << filenamesUnlabeled[ exampleIndex ] << std::endl; filenamesUnlabeled.erase( filenamesUnlabeled.begin()+exampleIndex ); pickedExamplesPerClass[label]++; } }// end computation for RANDOM else if ( (queryStrategy == GPMEAN) || (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTIC) ) { //compute uncertainty values for all examples according to the query strategy std::vector > scores; scores.clear(); time_t unc_pred_start_time = clock(); for (uint exIndex = 0; exIndex < unlabeledExamples.size(); exIndex++) { NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exIndex] ]; SparseVector xTrainSparse ( xTrain ); //compute similarities NICE::Vector kernelVector ( examples.size(), 0.0); for ( uint j = 0; j < examples.size(); j++ ) { kernelVector[j] = measureMinimumDistance( * examples[j].second.svec, xTrainSparse ); } if (queryStrategy == GPMEAN) { //compute the resulting score double score = kernelVector.scalarProduct( GPrightPart ); scores.push_back( std::pair ( exIndex, fabs(score) ) ); } else if (queryStrategy == GPPREDVAR) { double kernelSelf ( measureMinimumDistance( xTrainSparse, xTrainSparse) ); NICE::Vector rightPart (examples.size()); choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart ); double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart ); scores.push_back( std::pair ( exIndex, uncertainty) ); } else if (queryStrategy == GPHEURISTIC) { double kernelSelf ( measureMinimumDistance( xTrainSparse, xTrainSparse) ); NICE::Vector rightPart (examples.size()); choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart ); //uncertainty double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart ); //mean double score = kernelVector.scalarProduct( GPrightPart ); //compute the resulting score scores.push_back( std::pair ( exIndex, fabs(score) / sqrt( squaredNoise + uncertainty ) ) ); } } float time_score_computation = ( float ) ( clock() - unc_pred_start_time ) ; //pick the ones with best score //we could speed this up using a more sophisticated search method if (queryStrategy == GPPREDVAR) //take the maximum of the scores for the predictive variance { std::set chosenExamplesForThisRun; chosenExamplesForThisRun.clear(); for (int i = 0; i < incrementalAddSize; i++) { std::vector >::iterator bestExample = scores.begin(); std::vector >::iterator worstExample = scores.begin(); for (std::vector >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++) { if (jIt->second > bestExample->second) bestExample = jIt; if (jIt->second < worstExample->second) worstExample = jIt; } std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl; Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ]; newExample.svec = new SparseVector( xTrain ); //actually this is the ACTIVE LEARNING step (query a label) int label( y[ unlabeledExamples[bestExample->first] ] ); examples.push_back ( pair ( label, newExample ) ); //remember the index, to safely remove this example afterwards from unlabeledExamples chosenExamplesForThisRun.insert(bestExample->first); scores.erase(bestExample); pickedExamplesPerClass[label]++; } // std::cerr << "print chosen examples: " << std::endl; /* int tmpCnt(0); for (std::set::const_iterator it = chosenExamplesForThisRun.begin(); it != chosenExamplesForThisRun.end(); it++, tmpCnt++) { std::cerr << tmpCnt+1 << " / " << incrementalAddSize << " : " << filenamesUnlabeled[ *it ] << std::endl; } */ //delete the queried examples from the set of unlabeled ones //do this in an decreasing order in terms of indices to ensure valid access for (std::set::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++) { unlabeledExamples.erase( unlabeledExamples.begin()+(*it) ); } } else //take the minimum of the scores for the heuristic and the gp mean (minimum margin) { std::set chosenExamplesForThisRun; chosenExamplesForThisRun.clear(); for (int i = 0; i < incrementalAddSize; i++) { std::vector >::iterator bestExample = scores.begin(); std::vector >::iterator worstExample = scores.begin(); for (std::vector >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++) { if (jIt->second < bestExample->second) bestExample = jIt; if (jIt->second > worstExample->second) worstExample = jIt; } std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl; Example newExample; NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ]; newExample.svec = new SparseVector( xTrain ); //actually this is the ACTIVE LEARNING step (query a label) int label( y[ unlabeledExamples[bestExample->first] ] ); examples.push_back ( pair ( label, newExample ) ); //remember the index, to safely remove this example afterwards from unlabeledExamples chosenExamplesForThisRun.insert(bestExample->first); scores.erase(bestExample); pickedExamplesPerClass[label]++; } //delete the queried example from the set of unlabeled ones //do this in an decreasing order in terms of indices to ensure valid access for (std::set::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++) { unlabeledExamples.erase( unlabeledExamples.begin()+(*it) ); } } std::cerr << "Time used to compute query-scores for " << nrOfPossibleExamples << " examples: " << time_score_computation / CLOCKS_PER_SEC << " [s]" << std::endl; } // end computation for GPMEAN, GPPREDVAR, or GPHEURISTIC std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; //again: brute force GP regression graining Timer t; t.start(); NICE::Matrix kernelMatrix (examples.size(), examples.size(), 0.0); //and set zero to minus one for the internal GP computations for expected mean NICE::Vector yBinGP ( examples.size(), -1 ); //now compute the kernelScores for every element double kernelScore(0.0); for ( uint i = 0; i < examples.size(); i++ ) { for ( uint j = i; j < examples.size(); j++ ) { kernelScore = measureMinimumDistance(* examples[i].second.svec, * examples[j].second.svec); kernelMatrix(i,j) = kernelScore; if (i != j) kernelMatrix(j,i) = kernelScore; } if ( examples[i].first == 1) yBinGP[i] = 1; } //adding some noise, if necessary if ( squaredNoise != 0.0 ) { kernelMatrix.addIdentity( squaredNoise ); } else { //zero was already set } //compute its inverse //noise is already added :) //update the cholesky decomposition choleskyMatrix.resize ( examples.size(), examples.size() ); choleskyMatrix.set( 0.0 ); cr.robustChol ( kernelMatrix, choleskyMatrix ); //and update the right part needed for the posterior mean GPrightPart.resize ( examples.size() ); GPrightPart.set( 0.0 ); choleskySolveLargeScale ( choleskyMatrix, yBinGP, GPrightPart ); t.stop(); std::cerr << "Time for IL-adding of " << incrementalAddSize << " examples to already " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*incrementationStep << " training-examples: " << t.getLast() << " [s]" << std::endl; IL_training_times[incrementationStep].push_back( t.getLast() ); //do the classification for evaluating the benefit of new examples if ( do_classification ) { time_t start_time = clock(); ClassificationResults results; confusionMatrix.set( 0.0 ); for ( uint i = 0 ; i < testData.size(); i++ ) { const Vector & xstar = testData[i]; SparseVector xstar_sparse ( xstar ); //compute similarities NICE::Vector kernelVector ( examples.size(), 0.0 ); for ( uint j = 0; j < examples.size(); j++ ) { kernelVector[j] = measureMinimumDistance( * examples[j].second.svec, xstar_sparse ); } //compute the resulting score double score = kernelVector.scalarProduct( GPrightPart ); //this is the standard score-object needed for the evaluation FullVector scores ( 2 ); scores[0] = -1.0*score; scores[1] = score; ClassificationResult result ( scores.maxElement(), scores ); result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0; result.classno = ( score >= 0.0 ) ? 1 : 0; results.push_back( result ); confusionMatrix ( result.classno_groundtruth , result.classno ) ++; } float time_classification = ( float ) ( clock() - start_time ) ; if ( verbose >= LOW ) std::cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << std::endl; ( classification_times[incrementationStep+1] ).push_back ( time_classification / CLOCKS_PER_SEC ); confusionMatrix.normalizeRowsL1(); std::cerr << confusionMatrix; double avg_recognition_rate ( 0.0 ); for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ ) { avg_recognition_rate += confusionMatrix ( i, i ); } avg_recognition_rate /= confusionMatrix.rows(); std::cerr << "class: " << currentClass << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl; recognitions_rates[incrementationStep+1].push_back ( avg_recognition_rate*100 ); double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << "class: " << currentClass << " run: " << run << " AUC-score: " << score << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl << std::endl; AUC_scores[incrementationStep+1].push_back ( score*100 ); } //classification after IL adding */ } //IL adding of different classes std::cerr << "Final statistic about picked examples per class: " << pickedExamplesPerClass << std::endl; //don't waste memory! for ( uint tmp = 0; tmp < examples.size(); tmp++ ) { delete examples[tmp].second.svec; examples[tmp].second.svec = NULL; } }//runs // ================= EVALUATION ========================= int nrOfClassesUsed ( 2 ); //binary setting if ( do_classification ) { std::cerr << "========================" << std::endl; std::cerr << " final evaluation for class: " << currentClass << std::endl; std::cerr << "content of classification_times: " << std::endl; for ( std::vector >::const_iterator it = classification_times.begin(); it != classification_times.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::vector mean_classification_times; std::vector std_dev_classification_times; for ( std::vector >::const_iterator it = classification_times.begin(); it != classification_times.end(); it++ ) { float mean_classification_time ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_classification_time += *itRun; } mean_classification_time /= it->size(); mean_classification_times.push_back ( mean_classification_time ); double std_dev_classification_time ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_classification_time += pow ( *itRun - mean_classification_time, 2 ); } std_dev_classification_time /= it->size(); std_dev_classification_time = sqrt ( std_dev_classification_time ); std_dev_classification_times.push_back ( std_dev_classification_time ); } int datasize ( nrOfClassesUsed*trainExPerClass ); for ( uint i = 0; i < mean_classification_times.size(); i++) { std::cerr << "size: " << datasize << " mean classification time: " << mean_classification_times[i] << " std_dev classification time: " << std_dev_classification_times[i] << std::endl; datasize += incrementalAddSize ; } } else { std::cerr << "========================" << std::endl; std::cerr << "No classification done therefor no classification times available." << std::endl; } std::cerr << "========================" << std::endl; std::cerr << "content of IL_training_times for class : "<< currentClass << std::endl; for ( std::vector >::const_iterator it = IL_training_times.begin(); it != IL_training_times.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::vector mean_IL_training_times; std::vector std_dev_IL_training_times; for ( std::vector >::const_iterator it = IL_training_times.begin(); it != IL_training_times.end(); it++ ) { float mean_IL_training_time ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_IL_training_time += *itRun; } mean_IL_training_time /= it->size(); mean_IL_training_times.push_back ( mean_IL_training_time ); double std_dev_IL_training_time ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_IL_training_time += pow ( *itRun - mean_IL_training_time, 2 ); } std_dev_IL_training_time /= it->size(); std_dev_IL_training_time = sqrt ( std_dev_IL_training_time ); std_dev_IL_training_times.push_back ( std_dev_IL_training_time ); } int datasize ( nrOfClassesUsed*trainExPerClass ); for ( uint i = 0; i < mean_IL_training_times.size(); i++) { cerr << "size: " << datasize << " and adding " << incrementalAddSize << " mean IL_training time: " << mean_IL_training_times[i] << " std_dev IL_training time: " << std_dev_IL_training_times[i] << endl; datasize += incrementalAddSize ; } if ( do_classification ) { std::cerr << "========================" << std::endl; std::cerr << "content of recognition_rates for class : "<< currentClass << std::endl; for ( std::vector >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::cerr << "calculating final recognition_rates for class : "<< currentClass << std::endl; std::vector mean_recs; std::vector std_dev_recs; for (std::vector >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ ) { double mean_rec ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_rec += *itRun; } mean_rec /= it->size(); mean_recs.push_back ( mean_rec ); double std_dev_rec ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_rec += pow ( *itRun - mean_rec, 2 ); } std_dev_rec /= it->size(); std_dev_rec = sqrt ( std_dev_rec ); std_dev_recs.push_back ( std_dev_rec ); } int datasize ( nrOfClassesUsed*trainExPerClass ); for ( uint i = 0; i < recognitions_rates.size(); i++) { std::cerr << "size: " << datasize << " mean_IL: " << mean_recs[i] << " std_dev_IL: " << std_dev_recs[i] << std::endl; datasize += incrementalAddSize ; } std::cerr << "========================" << std::endl; std::cerr << "content of AUC_scores for class : "<< currentClass << std::endl; for ( std::vector >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ ) { for ( std::vector ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ ) { std::cerr << *jt << " "; } std::cerr << std::endl; } std::cerr << "calculating final AUC_scores for class : "<< currentClass << std::endl; std::vector mean_aucs; std::vector std_dev_aucs; for (std::vector >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ ) { double mean_auc ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { mean_auc += *itRun; } mean_auc /= it->size(); mean_aucs.push_back ( mean_auc ); double std_dev_auc ( 0.0 ); for ( std::vector::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ ) { std_dev_auc += pow ( *itRun - mean_auc, 2 ); } std_dev_auc /= it->size(); std_dev_auc = sqrt ( std_dev_auc ); std_dev_aucs.push_back ( std_dev_auc ); } datasize = nrOfClassesUsed*trainExPerClass; for ( uint i = 0; i < recognitions_rates.size(); i++) { std::cerr << "size: " << datasize << " mean_IL: " << mean_aucs[i] << " std_dev_IL: " << std_dev_aucs[i] << std::endl; datasize += incrementalAddSize ; } } else { std::cerr << "========================" << std::endl; std::cerr << "No classification done therefor no classification times available." << std::endl; } } //for int currentClass... return 0; }