void AdaBoostMHClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName, const string& outFileName, int numIterations) { InputData* pData = loadInputData(dataFileName, shypFileName); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<BaseLearner*> weakHypotheses; // loads them us.loadHypotheses(shypFileName, weakHypotheses, pData); // where the results go vector< ExampleResults* > results; if (_verbose > 0) cout << "Classifying..." << flush; const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); ofstream outFile(outFileName.c_str()); string exampleName; if (_verbose > 0) cout << "Output likelihoods..." << flush; // get the results ///////////////////////////////////////////////////////////////////// // computeResults( pData, weakHypotheses, results, numIterations ); assert( !weakHypotheses.empty() ); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) pOutInfo = new OutputInfo(_outputInfoFile); // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // sum votes for classes vector< float > votesForExamples( numClasses ); vector< double > expVotesForExamples( numClasses ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; pOutInfo->initialize( pData ); // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; float alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<float>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); pOutInfo->outputError(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } // for (int i = 0; i < numExamples; ++i) // calculate likelihoods from votes fill( votesForExamples.begin(), votesForExamples.end(), 0.0 ); double lLambda = 0.0; for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<float>& currVotesVector = results[i]->getVotesVector(); double sumExp = 0.0; // for every class for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] = exp( currVotesVector[l] ) ; sumExp += expVotesForExamples[l]; } if ( sumExp > numeric_limits<double>::epsilon() ) { for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] /= sumExp; } } Example ex = pData->getExample( results[i]->getIdx() ); vector<Label> labs = ex.getLabels(); double m = numeric_limits<double>::infinity(); for (int l = 0; l < numClasses; ++l) { if ( labs[l].y > 0 ) { if ( expVotesForExamples[l] > numeric_limits<double>::epsilon() ) { double logVal = log( expVotesForExamples[l] ); if ( logVal != m ) { lLambda += ( ( 1.0/(double)numExamples ) * logVal ); } } } } } outFile << t << "\t" << lLambda ; outFile << '\n'; outFile.flush(); } if (pOutInfo) delete pOutInfo; // computeResults( pData, weakHypotheses, results, numIterations ); /////////////////////////////////////////////////////////////////////////////////// /* for (int i = 0; i < numExamples; ++i) { // output the name if it exists, otherwise the number // of the example exampleName = pData->getExampleName(i); if ( !exampleName.empty() ) outFile << exampleName << ','; // output the posteriors outFile << results[i]->getVotesVector()[0]; for (int l = 1; l < numClasses; ++l) outFile << ',' << results[i]->getVotesVector()[l]; outFile << '\n'; } */ if (_verbose > 0) cout << "Done!" << endl; if (_verbose > 1) { cout << "\nClass order (You can change it in the header of the data file):" << endl; for (int l = 0; l < numClasses; ++l) cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl; } // delete the input data file if (pData) delete pData; vector<ExampleResults*>::iterator it; for (it = results.begin(); it != results.end(); ++it) delete (*it); }
// Returns the results into ptRes void AdaBoostMHClassifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses, vector< ExampleResults* >& results, int numIterations) { assert( !weakHypotheses.empty() ); const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) pOutInfo = new OutputInfo(_outputInfoFile); // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; if ( pOutInfo ) pOutInfo->initialize( pData ); // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; float alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<float>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); pOutInfo->outputError(pData, currWeakHyp); pOutInfo->outputBalancedError(pData, currWeakHyp); if ( ( t % 1 ) == 0 ) { pOutInfo->outputROC(pData, currWeakHyp); } // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } } if (pOutInfo) delete pOutInfo; }
void FilterBoostLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); time_t startTime, currentTime; time(&startTime); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); const int numClasses = pTrainingData->getNumClasses(); const int numExamples = pTrainingData->getNumExamples(); //initialize the margins variable _margins.resize( numExamples ); for( int i=0; i<numExamples; i++ ) { _margins[i].resize( numClasses ); fill( _margins[i].begin(), _margins[i].end(), 0.0 ); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); pOutInfo = new OutputInfo(_outputInfoFile); pOutInfo->initialize(pTrainingData); updateMargins( pTrainingData, pConstantWeakHypothesis ); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(); pOutInfo->outputIteration(-1); pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputError(pTestData, pConstantWeakHypothesis); /* pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis); pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis); pOutInfo->outputMAE(pTrainingData); if (pTestData) pOutInfo->outputMAE(pTestData); */ pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; filter( pTrainingData, (int)(_Cn * log(t+2.0)) ); if ( pTrainingData->getNumExamples() < 2 ) { filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); } if (_verbose > 1) { cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl; } BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); float energy = pWeakHypothesis->run(); BaseLearner* pConstantWeakHypothesis; if (_withConstantLearner) // check constant learner if user wants it { pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); } //estimate edge filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); float edge = pWeakHypothesis->getEdge() / 2.0; if (_withConstantLearner) // check constant learner if user wants it { float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0; if ( constantEdge > edge ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; edge = constantEdge; } else { delete pConstantWeakHypothesis; } } // calculate alpha float alpha = 0.0; alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) ); pWeakHypothesis->setAlpha( alpha ); if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information pTrainingData->clearIndexSet(); printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge float gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // update the margins updateMargins( pTrainingData, pWeakHypothesis ); // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }