// Continue returns the results into ptRes for savePosteriors // must be called the computeResult first!!! void MDDAGClassifier::continueComputingResults(InputData* pData, vector<BaseLearner*>& weakHypotheses, vector< ExampleResults* >& results, int fromIteration, int toIteration) { assert( !weakHypotheses.empty() ); const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < fromIteration; ++whyIt, ++t) {} // for every feature: 1..T for (; whyIt != weakHypotheses.end() && t < toIteration; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } } }
void DataReader::calculateHypothesesMatrix() { cout << "[+] Calculate weak hyp matrix..." << endl; const int numExamples = _pCurrentData->getNumExamples(); const int numClasses = _pCurrentData->getNumClasses(); hypermat& allOutputs = _weakHypothesesMatrices[_pCurrentData]; allOutputs.resize(numExamples); cout << "Memory allocation for " << numExamples << " examples, " << _numIterations << " classifiers, and " << numClasses << " classes..." << flush; for(int i = 0; i < numExamples; ++i) { allOutputs[i].resize(_numIterations); for (int j = 0; j < _numIterations; ++j) { allOutputs[i][j].resize(numClasses, 0.); } } cout << "Done." << endl; // const int step = (_totalNumIterations) < 50 ? 1 : (_totalNumIterations) / 50; // cout << "Computing the weak hyp outputs: 0%." << flush; cout << "Computing the weak hyp outputs... " << flush; int t = 0; for(int wHypInd = 0; wHypInd < _numIterations; ++wHypInd ) { // // if ((t + 1) % 1000 == 0) // cout << "." << flush; // // if ((t + 1) % step == 0) // { // float progress = static_cast<float>(t) / (float)(_totalNumIterations) * 100.0; // cout << "." << setprecision(2) << progress << "%." << flush; // } vector<BaseLearner*>::iterator whypIt; for (whypIt = _weakHypotheses[wHypInd].begin(); whypIt != _weakHypotheses[wHypInd].end(); ++whypIt) { // AbstainableLearner* currWeakHyp = dynamic_cast<AbstainableLearner*>(*whypIt); BaseLearner* currWeakHyp = *whypIt; AlphaReal alpha = currWeakHyp->getAlpha(); for(int i = 0; i < numExamples; ++i) { for (int l = 0; l < numClasses; ++l) { allOutputs[i][wHypInd][l] += alpha * currWeakHyp->classify(_pCurrentData, i, l); } } ++t; } } cout << "Done." << endl; }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- bool AdaBoostMDPClassifier::classifyTestMDP( int i ) { double acc=0.0; const int numClasses = _pData->getNumClasses(); const int numExamples = _pTestData->getNumExamples(); ExampleResults* tmpResult = new ExampleResults( i, numClasses ); vector<AlphaReal>& currVotesVector = tmpResult->getVotesVector(); for( int j=0; j<_weakHypotheses.size(); j++ ) { if (_history[j]) { BaseLearner* currWeakHyp = _weakHypotheses[j]; float alpha = currWeakHyp->getAlpha(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(_pTestData, i, l); } } vector<Label>::const_iterator lIt; const vector<Label>& labels = _pTestData->getLabels(i); // the vote of the winning negative class float maxNegClass = -numeric_limits<float>::max(); // the vote of the winning positive class float minPosClass = numeric_limits<float>::max(); for ( lIt = labels.begin(); lIt != labels.end(); ++lIt ) { // get the negative winner class if ( lIt->y < 0 && currVotesVector[lIt->idx] > maxNegClass ) maxNegClass = currVotesVector[lIt->idx]; // get the positive winner class if ( lIt->y > 0 && currVotesVector[lIt->idx] < minPosClass ) minPosClass = currVotesVector[lIt->idx]; } // if the vote for the worst positive label is lower than the // vote for the highest negative label -> error if (minPosClass <= maxNegClass) return false; else { return true; } }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- DataReader::DataReader(const nor_utils::Args& args, int verbose) : _verbose(verbose), _args(args) { string mdpTrainFileName = _args.getValue<string>("traintestmdp", 0); string testFileName = _args.getValue<string>("traintestmdp", 1); string shypFileName = _args.getValue<string>("traintestmdp", 3); _numIterations = _args.getValue<int>("traintestmdp", 2); string tmpFname = _args.getValue<string>("traintestmdp", 4); if (_verbose > 0) cout << "Loading arff data for MDP learning..." << flush; // load the arff loadInputData(mdpTrainFileName, testFileName, shypFileName); if (_verbose > 0) cout << "Done." << endl << flush; if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // loads them us.loadHypotheses(shypFileName, _weakHypotheses, _pTrainData); if (_numIterations<_weakHypotheses.size()) _weakHypotheses.resize(_numIterations); if (_verbose > 0) cout << "Done." << endl << flush; assert( _weakHypotheses.size() >= _numIterations ); // calculate the sum of alphas vector<BaseLearner*>::iterator it; _sumAlphas=0.0; for( it = _weakHypotheses.begin(); it != _weakHypotheses.end(); ++it ) { BaseLearner* currBLearner = *it; _sumAlphas += currBLearner->getAlpha(); } }
vector<AlphaReal> DataReader::getWhypClassification( const int wHypInd, const int instance ) { const int numClasses = _pCurrentData->getNumClasses(); vector<AlphaReal> scoreVector(numClasses); vector<BaseLearner*>::iterator whypIt; for (whypIt = _weakHypotheses[wHypInd].begin(); whypIt != _weakHypotheses[wHypInd].end(); ++whypIt) { BaseLearner* currWeakHyp = *whypIt; AlphaReal alpha = currWeakHyp->getAlpha(); for (int l = 0; l < numClasses; ++l) scoreVector[l] += alpha * currWeakHyp->classify(_pCurrentData, instance, l); } return scoreVector; }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- vector<int> DataReader::classifyKthWeakLearner( const int wHypInd, const int instance, ExampleResults* exampleResult ) { if (_verbose>3) { //cout << "Classifiying: " << wHypInd << endl; } if ( wHypInd >= _numIterations ) { assert(false); } const int numClasses = _pCurrentData->getNumClasses(); // a reference for clarity and speed vector<AlphaReal>& currVotesVector = exampleResult->getVotesVector(); vector<int> ternaryPhis(numClasses); AlphaReal alpha; // for every class if (_isDataStorageMatrix) { for (int l = 0; l < numClasses; ++l) { currVotesVector[l] += (*_pCurrentMatrix)[instance][wHypInd][l]; ternaryPhis[l] = (currVotesVector[l] > 0) ? 1 : ((currVotesVector[l] < 0) ? -1 : 0) ; } } else { vector<BaseLearner*>::iterator whypIt; for (whypIt = _weakHypotheses[wHypInd].begin(); whypIt != _weakHypotheses[wHypInd].end(); ++whypIt) { BaseLearner* currWeakHyp = *whypIt; alpha = currWeakHyp->getAlpha(); for (int l = 0; l < numClasses; ++l) { int vote = currWeakHyp->classify(_pCurrentData, instance, l); currVotesVector[l] += alpha * vote; ternaryPhis[l] = (currVotesVector[l] > 0) ? 1 : ((currVotesVector[l] < 0) ? -1 : 0) ; } } } return ternaryPhis; }
void SoftCascadeLearner::computePosteriors(InputData* pData, vector<BaseLearner*> & weakHypotheses, vector<AlphaReal> & oPosteriors, int positiveLabelIndex) { const int numExamples = pData->getNumExamples(); oPosteriors.resize(numExamples); fill(oPosteriors.begin(), oPosteriors.end(), 0. ); vector<BaseLearner*>::iterator whyIt = weakHypotheses.begin(); for (;whyIt != weakHypotheses.end(); ++whyIt ) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); for (int i = 0; i < numExamples; ++i) { AlphaReal alphaH = alpha * currWeakHyp->classify(pData, i, positiveLabelIndex); oPosteriors[i] += alphaH; } } }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- double DataReader::classifyKthWeakLearner( const int wHypInd, const int instance, ExampleResults* exampleResult ) { if (_verbose>3) { //cout << "Classifiying: " << wHypInd << endl; } if ( wHypInd >= _numIterations ) return -1.0; // indicating error const int numClasses = _pCurrentData->getNumClasses(); BaseLearner* currWeakHyp = _weakHypotheses[wHypInd]; float alpha = currWeakHyp->getAlpha(); // a reference for clarity and speed vector<AlphaReal>& currVotesVector = exampleResult->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(_pCurrentData, instance, l); return alpha; }
// Returns the results into ptRes void MDDAGClassifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses, vector< ExampleResults* >& results, int numIterations) { assert( !weakHypotheses.empty() ); const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { if ( _args.getNumValues("outputinfo") > 1 ) { pOutInfo = new OutputInfo(_args);; } else { pOutInfo = new OutputInfo(_outputInfoFile, "e01hamauc", false); } } // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; if ( pOutInfo ) { pOutInfo->initialize( pData ); pOutInfo->outputHeader(pData->getClassMap(), true, // output iterations false, // output time true // endline ); } // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); // pOutInfo->outputError(pData, currWeakHyp); // pOutInfo->outTPRFPR(pData); //pOutInfo->outputBalancedError(pData, currWeakHyp); // if ( ( t % 1 ) == 0 ) { // pOutInfo->outputROC(pData); // } pOutInfo->outputCustom(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } } if (pOutInfo) delete pOutInfo; }
void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName, const string& outFileName, int numIterations) { InputData* pData = loadInputData(dataFileName, shypFileName); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<BaseLearner*> weakHypotheses; // loads them us.loadHypotheses(shypFileName, weakHypotheses, pData); // where the results go vector< ExampleResults* > results; if (_verbose > 0) cout << "Classifying..." << flush; const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); ofstream outFile(outFileName.c_str()); string exampleName; if (_verbose > 0) cout << "Output likelihoods..." << flush; // get the results ///////////////////////////////////////////////////////////////////// // computeResults( pData, weakHypotheses, results, numIterations ); assert( !weakHypotheses.empty() ); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) pOutInfo = new OutputInfo(_outputInfoFile, "err"); // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // sum votes for classes vector< AlphaReal > votesForExamples( numClasses ); vector< AlphaReal > expVotesForExamples( numClasses ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; pOutInfo->initialize( pData ); // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); pOutInfo->outputCustom(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } // for (int i = 0; i < numExamples; ++i) // calculate likelihoods from votes fill( votesForExamples.begin(), votesForExamples.end(), 0.0 ); AlphaReal lLambda = 0.0; for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); AlphaReal sumExp = 0.0; // for every class for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] = exp( currVotesVector[l] ) ; sumExp += expVotesForExamples[l]; } if ( sumExp > numeric_limits<AlphaReal>::epsilon() ) { for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] /= sumExp; } } Example ex = pData->getExample( results[i]->getIdx() ); vector<Label> labs = ex.getLabels(); AlphaReal m = numeric_limits<AlphaReal>::infinity(); for (int l = 0; l < numClasses; ++l) { if ( labs[l].y > 0 ) { if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() ) { AlphaReal logVal = log( expVotesForExamples[l] ); if ( logVal != m ) { lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal ); } } } } } outFile << t << "\t" << lLambda ; outFile << '\n'; outFile.flush(); } if (pOutInfo) delete pOutInfo; // computeResults( pData, weakHypotheses, results, numIterations ); /////////////////////////////////////////////////////////////////////////////////// /* for (int i = 0; i < numExamples; ++i) { // output the name if it exists, otherwise the number // of the example exampleName = pData->getExampleName(i); if ( !exampleName.empty() ) outFile << exampleName << ','; // output the posteriors outFile << results[i]->getVotesVector()[0]; for (int l = 1; l < numClasses; ++l) outFile << ',' << results[i]->getVotesVector()[l]; outFile << '\n'; } */ if (_verbose > 0) cout << "Done!" << endl; if (_verbose > 1) { cout << "\nClass order (You can change it in the header of the data file):" << endl; for (int l = 0; l < numClasses; ++l) cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl; } // delete the input data file if (pData) delete pData; vector<ExampleResults*>::iterator it; for (it = results.begin(); it != results.end(); ++it) delete (*it); }
void FilterBoostLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); time_t startTime, currentTime; time(&startTime); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); const int numClasses = pTrainingData->getNumClasses(); const int numExamples = pTrainingData->getNumExamples(); //initialize the margins variable _margins.resize( numExamples ); for( int i=0; i<numExamples; i++ ) { _margins[i].resize( numClasses ); fill( _margins[i].begin(), _margins[i].end(), 0.0 ); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pTrainingData); updateMargins( pTrainingData, pConstantWeakHypothesis ); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap() ); pOutInfo->outputIteration(-1); pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis); if (pTestData) { pOutInfo->separator(); pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis); } pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; // create the weak learner BaseLearner* pWeakHypothesis; BaseLearner* pConstantWeakHypothesis; pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); AlphaReal edge, energy=0.0; // create the constant learner pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEdge = -numeric_limits<AlphaReal>::max(); int currentNumberOfUsedData = static_cast<int>(_Cn * log(t+3.0)); if ( _onlineWeakLearning ) { //check whether the weak learner is a ScalarLeaerner try { StochasticLearner* pStochasticLearner = dynamic_cast<StochasticLearner*>(pWeakHypothesis); StochasticLearner* pStochasticConstantWeakHypothesis = dynamic_cast<StochasticLearner*> (pConstantWeakHypothesis); pStochasticLearner->initLearning(); pStochasticConstantWeakHypothesis->initLearning(); if (_verbose>1) cout << "Number of random instances: \t" << currentNumberOfUsedData << endl; // set the weights setWeightToMargins(pTrainingData); //learning for (int i=0; i<currentNumberOfUsedData; ++i ) { int randomIndex = (rand() % pTrainingData->getNumExamples()); //int randomIndex = getRandomIndex(); pStochasticLearner->update(randomIndex); pStochasticConstantWeakHypothesis->update(randomIndex); } pStochasticLearner->finishLearning(); pStochasticConstantWeakHypothesis->finishLearning(); } catch (bad_cast& e) { cerr << "The weak learner must be a StochasticLearner!!!" << endl; exit(-1); } } else { filter( pTrainingData, currentNumberOfUsedData ); if ( pTrainingData->getNumExamples() < 2 ) { filter( pTrainingData, currentNumberOfUsedData, false ); } if (_verbose > 1) { cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl; } energy = pWeakHypothesis->run(); pConstantWeakHypothesis->run(); } //estimate edge filter( pTrainingData, currentNumberOfUsedData, false ); edge = pWeakHypothesis->getEdge(true) / 2.0; constantEdge = pConstantWeakHypothesis->getEdge() / 2.0; if ( constantEdge > edge ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; edge = constantEdge; } else { delete pConstantWeakHypothesis; } // calculate alpha AlphaReal alpha = 0.0; alpha = 0.5 * log( ( 1 + edge ) / ( 1 - edge ) ); pWeakHypothesis->setAlpha( alpha ); _sumAlpha += alpha; if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information pTrainingData->clearIndexSet(); printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge //AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << edge << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // update the margins //saveMargins(); updateMargins( pTrainingData, pWeakHypothesis ); // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- double DataReader::getAdaboostPerfOnCurrentDataset() { const int numClasses = _pCurrentData->getNumClasses(); const int numExamples = _pCurrentData->getNumExamples(); int correct = 0; int incorrect = 0; double err; vector<double>& iterationWiseError = _iterationWiseError[_pCurrentData]; iterationWiseError.resize(_weakHypotheses.size(), 0.); vector<ExampleResults*> examplesResults(numExamples); for (int i = 0; i < numExamples; ++i) examplesResults[i] = new ExampleResults(i, numClasses) ; for( int j = 0; j < _weakHypotheses.size(); ++j ) { correct = 0; incorrect = 0; for( int i = 0; i < numExamples; ++i ) { ExampleResults*& tmpResult = examplesResults[i]; vector<AlphaReal>& currVotesVector = tmpResult->getVotesVector(); if (_isDataStorageMatrix) { for (int l = 0; l < numClasses; ++l) currVotesVector[l] += (*_pCurrentMatrix)[i][j][l]; } else { vector<BaseLearner*>::iterator whypIt; for (whypIt = _weakHypotheses[j].begin(); whypIt != _weakHypotheses[j].end(); ++whypIt) { BaseLearner* currWeakHyp = *whypIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(_pCurrentData, i, l); } } vector<Label>::const_iterator lIt; const vector<Label>& labels = _pCurrentData->getLabels(i); // the vote of the winning negative class AlphaReal maxNegClass = -numeric_limits<AlphaReal>::max(); // the vote of the winning positive class AlphaReal minPosClass = numeric_limits<AlphaReal>::max(); for ( lIt = labels.begin(); lIt != labels.end(); ++lIt ) { // get the negative winner class if ( lIt->y < 0 && currVotesVector[lIt->idx] > maxNegClass ) maxNegClass = currVotesVector[lIt->idx]; // get the positive winner class if ( lIt->y > 0 && currVotesVector[lIt->idx] < minPosClass ) minPosClass = currVotesVector[lIt->idx]; } // if the vote for the worst positive label is lower than the // vote for the highest negative label -> error if (minPosClass <= maxNegClass) incorrect++; else { correct++; } } err = ((double) incorrect / ((double) numExamples)); // * 100.0; iterationWiseError[j] = err; } // cout << endl; // int i = 0; // for (const auto & myTmpKey : _iterationWiseError[_pCurrentData]) { // cout << myTmpKey << " "; // ++i; // if (i > 50) { // break; // } // } // cout << endl; for (int i = 0; i < numExamples; ++i) delete examplesResults[i] ; // double acc = ((double) correct / ((double) numExamples)) * 100.0; return err; }
void SoftCascadeLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); //print cascade properties if (_verbose > 0) { cout << "[+] Softcascade parameters :" << endl << "\t --> target detection rate = " << _targetDetectionRate << endl << "\t --> alpha (exp param) = " << _alphaExponentialParameter << endl << "\t --> bootstrap rate = " << _bootstrapRate << endl << endl; } // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, 5); InputData* pBootstrapData = NULL; if (!_bootstrapFileName.empty()) { pBootstrapData = pWeakHypothesisSource->createInputData(); pBootstrapData->initOptions(args); pBootstrapData->load(_bootstrapFileName, IT_TRAIN, 5); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, 5); } Serialization ss(_shypFileName, false ); ss.writeHeader(_baseLearnerName); // outputHeader(); // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { pOutInfo = new OutputInfo(args, true); pOutInfo->setOutputList("sca", &args); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap(), true, true, false); pOutInfo->outputUserHeader("thresh"); pOutInfo->headerEndLine(); } // ofstream trainPosteriorsFile; // ofstream testPosteriorsFile; const NameMap& namemap = pTrainingData->getClassMap(); _positiveLabelIndex = namemap.getIdxFromName(_positiveLabelName); // FIXME: output posteriors // OutputInfo* pTrainPosteriorsOut = NULL; // OutputInfo* pTestPosteriorsOut = NULL; // if (! _trainPosteriorsFileName.empty()) { // pTrainPosteriorsOut = new OutputInfo(_trainPosteriorsFileName, "pos", true); // pTrainPosteriorsOut->initialize(pTrainingData); // dynamic_cast<PosteriorsOutput*>( pTrainPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex ); // } // if (! _testPosteriorsFileName.empty() && !_testFileName.empty() ) { // pTestPosteriorsOut = new OutputInfo(_testPosteriorsFileName, "pos", true); // pTestPosteriorsOut->initialize(pTestData); // dynamic_cast<PosteriorsOutput*>( pTestPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex ); // } const int numExamples = pTrainingData->getNumExamples(); vector<BaseLearner*> inWeakHypotheses; if (_fullRun) { // TODO : the full training is implementet, testing is needed AdaBoostMHLearner* sHypothesis = new AdaBoostMHLearner(); sHypothesis->run(args, pTrainingData, _baseLearnerName, _numIterations, inWeakHypotheses ); delete sHypothesis; } else { cout << "[+] Loading uncalibrated shyp file... "; //read the shyp file of the trained classifier UnSerialization us; us.loadHypotheses(_unCalibratedShypFileName, inWeakHypotheses, pTrainingData); if (_inShypLimit > 0 && _inShypLimit < inWeakHypotheses.size() ) { inWeakHypotheses.resize(_inShypLimit); } if (_numIterations > inWeakHypotheses.size()) { _numIterations = inWeakHypotheses.size(); } cout << "weak hypotheses loaded, " << inWeakHypotheses.size() << " retained.\n"; } // some initializations _foundHypotheses.resize(0); double faceRejectionFraction = 0.; double estimatedExecutionTime = 0.; vector<double> rejectionDistributionVector; _rejectionThresholds.resize(0); set<int> trainingIndices; for (int i = 0; i < numExamples; i++) { trainingIndices.insert(pTrainingData->getRawIndex(i) ); } // init v_t (see the paper) initializeRejectionDistributionVector(_numIterations, rejectionDistributionVector); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the SoftCascade main loop /////////////////////////////////////////////////////////////////////// for (int t = 0; t < _numIterations; ++t) { if (_verbose > 0) cout << "--------------[ iteration " << (t+1) << " ]--------------" << endl; faceRejectionFraction += rejectionDistributionVector[t]; cout << "[+] Face rejection tolerated : " << faceRejectionFraction << " | v[t] = " << rejectionDistributionVector[t] << endl; int numberOfNegatives = pTrainingData->getNumExamplesPerClass(1 - _positiveLabelIndex); //vector<BaseLearner*>::const_iterator whyIt; int selectedIndex = 0; AlphaReal bestGap = 0; vector<AlphaReal> posteriors; computePosteriors(pTrainingData, _foundHypotheses, posteriors, _positiveLabelIndex); //should use an iterator instead of i vector<BaseLearner*>::iterator whyIt; int i; for (i = 0, whyIt = inWeakHypotheses.begin(); whyIt != inWeakHypotheses.end(); ++whyIt, ++i) { vector<AlphaReal> temporaryPosteriors = posteriors; vector<BaseLearner*> temporaryWeakHyp = _foundHypotheses; temporaryWeakHyp.push_back(*whyIt); updatePosteriors(pTrainingData, *whyIt, temporaryPosteriors, _positiveLabelIndex); AlphaReal gap = computeSeparationSpan(pTrainingData, temporaryPosteriors, _positiveLabelIndex ); if (gap > bestGap) { bestGap = gap; selectedIndex = i; } } BaseLearner* selectedWeakHypothesis = inWeakHypotheses[selectedIndex]; cout << "[+] Rank of the selected weak hypothesis : " << selectedIndex << endl << "\t ---> edge gap = " << bestGap << endl << "\t ---> alpha = " << selectedWeakHypothesis->getAlpha() << endl; //update the stages _foundHypotheses.push_back(selectedWeakHypothesis); updatePosteriors(pTrainingData, selectedWeakHypothesis, posteriors, _positiveLabelIndex); double missesFraction; AlphaReal r = findBestRejectionThreshold(pTrainingData, posteriors, faceRejectionFraction, missesFraction); _rejectionThresholds.push_back(r); // update the output info object dynamic_cast<SoftCascadeOutput*>( pOutInfo->getOutputInfoObject("sca") )->appendRejectionThreshold(r); cout << "[+] Rejection threshold = " << r << endl; //some updates ss.appendHypothesisWithThreshold(t, selectedWeakHypothesis, r); faceRejectionFraction -= missesFraction; inWeakHypotheses.erase(inWeakHypotheses.begin() + selectedIndex); double whypCost = 1; //just in case there are different costs for each whyp estimatedExecutionTime += whypCost * numberOfNegatives; // output perf in file vector< vector< AlphaReal> > scores(0); _output << t + 1 << setw(_sepWidth + 1) << r << setw(_sepWidth); // update OutputInfo with the new whyp // updateOutputInfo(pOutInfo, pTrainingData, selectedWeakHypothesis); // if (pTestData) { // updateOutputInfo(pOutInfo, pTestData, selectedWeakHypothesis); // } // output the iteration results printOutputInfo(pOutInfo, t, pTrainingData, pTestData, selectedWeakHypothesis, r); // if (pTrainPosteriorsOut) { // pTrainPosteriorsOut->setTable(pTrainingData, pOutInfo->getTable(pTrainingData)); // pTrainPosteriorsOut->outputCustom(pTrainingData); // } // // if (pTestPosteriorsOut) { // pTestPosteriorsOut->setTable(pTestData, pOutInfo->getTable(pTestData)); // pTestPosteriorsOut->outputCustom(pTestData); // } int leftNegatives = filterDataset(pTrainingData, posteriors, r, trainingIndices); if (leftNegatives == 0) { cout << endl << "[+] No more negatives.\n"; break; } if (_bootstrapRate != 0) { bootstrapTrainingSet(pTrainingData, pBootstrapData, trainingIndices); } } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pBootstrapData) { delete pBootstrapData; } if (pTestData) delete pTestData; if (_verbose > 0) cout << "Learning completed." << endl; }
void FilterBoostLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); time_t startTime, currentTime; time(&startTime); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); const int numClasses = pTrainingData->getNumClasses(); const int numExamples = pTrainingData->getNumExamples(); //initialize the margins variable _margins.resize( numExamples ); for( int i=0; i<numExamples; i++ ) { _margins[i].resize( numClasses ); fill( _margins[i].begin(), _margins[i].end(), 0.0 ); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); pOutInfo = new OutputInfo(_outputInfoFile); pOutInfo->initialize(pTrainingData); updateMargins( pTrainingData, pConstantWeakHypothesis ); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(); pOutInfo->outputIteration(-1); pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputError(pTestData, pConstantWeakHypothesis); /* pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis); pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis); pOutInfo->outputMAE(pTrainingData); if (pTestData) pOutInfo->outputMAE(pTestData); */ pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; filter( pTrainingData, (int)(_Cn * log(t+2.0)) ); if ( pTrainingData->getNumExamples() < 2 ) { filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); } if (_verbose > 1) { cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl; } BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); float energy = pWeakHypothesis->run(); BaseLearner* pConstantWeakHypothesis; if (_withConstantLearner) // check constant learner if user wants it { pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); } //estimate edge filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); float edge = pWeakHypothesis->getEdge() / 2.0; if (_withConstantLearner) // check constant learner if user wants it { float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0; if ( constantEdge > edge ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; edge = constantEdge; } else { delete pConstantWeakHypothesis; } } // calculate alpha float alpha = 0.0; alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) ); pWeakHypothesis->setAlpha( alpha ); if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information pTrainingData->clearIndexSet(); printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge float gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // update the margins updateMargins( pTrainingData, pWeakHypothesis ); // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
// ------------------------------------------------------------------------- void AdaBoostMHLearner::run( const nor_utils::Args& args, InputData* pTrainingData, const string baseLearnerName, const int numIterations, vector<BaseLearner*>& foundHypotheses ) { // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = 0; t < numIterations; ++t) { if ((_verbose > 0)&&((t%100)==0)) cout << "--------------[ Boosting iteration " << (t+1) << " ]--------------" << endl; BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); AlphaReal energy = pWeakHypothesis->run(); //float gamma = pWeakHypothesis->getEdge(); //cout << gamma << endl; if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner { BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); if ( (constantEnergy <= energy) || ( energy != energy ) ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; } } if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Updates the weights and returns the edge AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // If gamma <= theta the algorithm must stop. // If theta == 0 and gamma is 0, it means that the weak learner is no better than chance // and no further training is possible. if (gamma <= _theta) { if (_verbose > 0) { cout << "Can't train any further: edge = " << gamma << " (with and edge offset (theta)=" << _theta << ")" << endl; } // delete pWeakHypothesis; // break; } // Add it to the internal list of weak hypotheses foundHypotheses.push_back(pWeakHypothesis); } // loop on iterations ///////////////////////////////////////////////////////// if (_verbose > 0) cout << "--------------[ AdaBoost Learning completed. ]--------------" << endl; }
// ------------------------------------------------------------------------- // ------------------------------------------------------------------------- AlphaReal AdaBoostMHLearner::updateWeights(OutputInfo* pOutInfo, InputData* pData, vector<BaseLearner*>& pWeakHypothesis){ const int numExamples = pData->getNumExamples(); const int numClasses = pData->getNumClasses(); AlphaReal Z = 0; // The normalization factor // _hy will contain the margins _hy.resize(numExamples); for ( int i = 0; i < numExamples; ++i){ _hy[i].resize(numClasses); fill( _hy[i].begin(), _hy[i].end(), 0.0 ); } vector<BaseLearner*>::iterator it; if (_verbose > 0) cout << ": 0%." << flush; const int numIters = static_cast<int>(_foundHypotheses.size()); const int step = numIters < 5 ? 1 : numIters / 5; int t = 0; // calculate the margins ( f^{t}(x_i) ), _hy will contain for( it = pWeakHypothesis.begin(); it != pWeakHypothesis.end(); it++, t++ ) { if (_verbose > 1 && (t + 1) % step == 0) { float progress = static_cast<float>(t) / static_cast<float>(numIters) * 100.0; cout << "." << setprecision(2) << progress << "%." << flush; } BaseLearner* pWeakHypothesis = *it; const AlphaReal alpha = pWeakHypothesis->getAlpha(); AlphaReal hx; for (int i = 0; i < numExamples; ++i) { vector<Label>& labels = pData->getLabels(i); vector<Label>::iterator lIt; for (lIt = labels.begin(); lIt != labels.end(); ++lIt ) { hx = pWeakHypothesis->classify(pData, i, lIt->idx ); _hy[i][lIt->idx] += alpha * hx; // alpha * h_l(x_i) lIt->weight *= exp( -alpha * hx * lIt->y ); Z += lIt->weight; } } // renormalize the weights for (int i = 0; i < numExamples; ++i) { vector<Label>& labels = pData->getLabels(i); vector<Label>::iterator lIt; for (lIt = labels.begin(); lIt != labels.end(); ++lIt ) { lIt->weight /= Z; } } //i++; //if ( i % 1000 == 0 ) cout << i <<endl; } //upload the margins pOutInfo->setTable( pData, _hy ); return 0; }
void AdaBoostMHLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); //pOutInfo = new OutputInfo(_outputInfoFile); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap()); pOutInfo->outputIteration(-1); pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis); if (pTestData != NULL) { pOutInfo->separator(); pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis); } pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } //cout << "Before serialization" << endl; // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; //I put here the starting time, but it may take very long time to load the saved model time_t startTime, currentTime; time(&startTime); /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); AlphaReal energy = pWeakHypothesis->run(); //float gamma = pWeakHypothesis->getEdge(); //cout << gamma << endl; if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner { BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); if ( (constantEnergy <= energy) || ( energy != energy ) ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; } } if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // If gamma <= theta the algorithm must stop. // If theta == 0 and gamma is 0, it means that the weak learner is no better than chance // and no further training is possible. if (gamma <= _theta) { if (_verbose > 0) { cout << "Can't train any further: edge = " << gamma << " (with and edge offset (theta)=" << _theta << ")" << endl; } // delete pWeakHypothesis; // break; } // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // write the weights of the instances if the name of weights file isn't empty printOutWeights( pTrainingData ); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
// ----------------------------------------------------------------------- // ----------------------------------------------------------------------- double DataReader::getAccuracyOnCurrentDataSet() { double acc=0.0; const int numClasses = _pCurrentData->getNumClasses(); const int numExamples = _pCurrentData->getNumExamples(); int correct=0; int incorrect=0; for( int i = 1; i < numExamples; i++ ) { ExampleResults* tmpResult = new ExampleResults( i, numClasses ); vector<AlphaReal>& currVotesVector = tmpResult->getVotesVector(); for( int j=0; j<_weakHypotheses.size(); j++ ) { BaseLearner* currWeakHyp = _weakHypotheses[j]; float alpha = currWeakHyp->getAlpha(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(_pCurrentData, i, l); } vector<Label>::const_iterator lIt; const vector<Label>& labels = _pCurrentData->getLabels(i); // the vote of the winning negative class float maxNegClass = -numeric_limits<float>::max(); // the vote of the winning positive class float minPosClass = numeric_limits<float>::max(); for ( lIt = labels.begin(); lIt != labels.end(); ++lIt ) { // get the negative winner class if ( lIt->y < 0 && currVotesVector[lIt->idx] > maxNegClass ) maxNegClass = currVotesVector[lIt->idx]; // get the positive winner class if ( lIt->y > 0 && currVotesVector[lIt->idx] < minPosClass ) minPosClass = currVotesVector[lIt->idx]; } // if the vote for the worst positive label is lower than the // vote for the highest negative label -> error if (minPosClass <= maxNegClass) incorrect++; else { correct++; } } acc = ((double) correct / ((double) numExamples)) * 100.0; return acc; }