Пример #1
0
	void AdaBoostMHClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName, 
		const string& outFileName, int numIterations)
	{
		InputData* pData = loadInputData(dataFileName, shypFileName);

		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;

		// The class that loads the weak hypotheses
		UnSerialization us;

		// Where to put the weak hypotheses
		vector<BaseLearner*> weakHypotheses;

		// loads them
		us.loadHypotheses(shypFileName, weakHypotheses, pData);

		// where the results go
		vector< ExampleResults* > results;

		if (_verbose > 0)
			cout << "Classifying..." << flush;

		const int numClasses = pData->getNumClasses();
		const int numExamples = pData->getNumExamples();


		ofstream outFile(outFileName.c_str());
		string exampleName;

		if (_verbose > 0)
			cout << "Output likelihoods..." << flush;

		// get the results
		/////////////////////////////////////////////////////////////////////
		// computeResults( pData, weakHypotheses, results, numIterations );
		assert( !weakHypotheses.empty() );

		// Initialize the output info
		OutputInfo* pOutInfo = NULL;

		if ( !_outputInfoFile.empty() )
			pOutInfo = new OutputInfo(_outputInfoFile);

		// Creating the results structures. See file Structures.h for the
		// PointResults structure
		results.clear();
		results.reserve(numExamples);
		for (int i = 0; i < numExamples; ++i)
			results.push_back( new ExampleResults(i, numClasses) );

		// sum votes for classes
		vector< float > votesForExamples( numClasses );
		vector< double > expVotesForExamples( numClasses );

		// iterator over all the weak hypotheses
		vector<BaseLearner*>::const_iterator whyIt;
		int t;

		pOutInfo->initialize( pData );

		// for every feature: 1..T
		for (whyIt = weakHypotheses.begin(), t = 0; 
			whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
		{
			BaseLearner* currWeakHyp = *whyIt;
			float alpha = currWeakHyp->getAlpha();

			// for every point
			for (int i = 0; i < numExamples; ++i)
			{
				// a reference for clarity and speed
				vector<float>& currVotesVector = results[i]->getVotesVector();

				// for every class
				for (int l = 0; l < numClasses; ++l)
					currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);
			}

			// if needed output the step-by-step information
			if ( pOutInfo )
			{
				pOutInfo->outputIteration(t);
				pOutInfo->outputError(pData, currWeakHyp);

				// Margins and edge requires an update of the weight,
				// therefore I keep them out for the moment
				//outInfo.outputMargins(pData, currWeakHyp);
				//outInfo.outputEdge(pData, currWeakHyp);
				pOutInfo->endLine();
			} // for (int i = 0; i < numExamples; ++i)
			// calculate likelihoods from votes

			fill( votesForExamples.begin(), votesForExamples.end(), 0.0 );
			double lLambda = 0.0;
			for (int i = 0; i < numExamples; ++i)
			{
				// a reference for clarity and speed
				vector<float>& currVotesVector = results[i]->getVotesVector();
				double sumExp = 0.0;
				// for every class
				for (int l = 0; l < numClasses; ++l) 
				{				 
					expVotesForExamples[l] =  exp( currVotesVector[l] ) ;
					sumExp += expVotesForExamples[l];
				}			

				if ( sumExp > numeric_limits<double>::epsilon() ) 
				{
					for (int l = 0; l < numClasses; ++l) 
					{
						expVotesForExamples[l] /= sumExp;
					}
				}

				Example ex = pData->getExample( results[i]->getIdx() );
				vector<Label> labs = ex.getLabels();
				double m = numeric_limits<double>::infinity();
				for (int l = 0; l < numClasses; ++l)  
				{
					if ( labs[l].y > 0 )
					{
						if ( expVotesForExamples[l] > numeric_limits<double>::epsilon() )
						{
							double logVal = log( expVotesForExamples[l] );
							
							if ( logVal != m ) {
								lLambda += ( ( 1.0/(double)numExamples ) * logVal );
							}
						}
					}
				}


			}
			

			outFile << t << "\t" << lLambda ;
			outFile << '\n';
			
			outFile.flush();
		}

		if (pOutInfo)
			delete pOutInfo;

		// computeResults( pData, weakHypotheses, results, numIterations );
		///////////////////////////////////////////////////////////////////////////////////


		/*
		for (int i = 0; i < numExamples; ++i)
		{
			// output the name if it exists, otherwise the number
			// of the example
			exampleName = pData->getExampleName(i);
			if ( !exampleName.empty() )
				outFile << exampleName << ',';

			// output the posteriors
			outFile << results[i]->getVotesVector()[0];
			for (int l = 1; l < numClasses; ++l)
				outFile << ',' << results[i]->getVotesVector()[l];
			outFile << '\n';
		}
		*/

		if (_verbose > 0)
			cout << "Done!" << endl;

		if (_verbose > 1)
		{
			cout << "\nClass order (You can change it in the header of the data file):" << endl;
			for (int l = 0; l < numClasses; ++l)
				cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;
		}

		// delete the input data file
		if (pData) 
			delete pData;

		vector<ExampleResults*>::iterator it;
		for (it = results.begin(); it != results.end(); ++it)
			delete (*it);
	}
Пример #2
0
	// Returns the results into ptRes
	void AdaBoostMHClassifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses, 
		vector< ExampleResults* >& results, int numIterations)
	{
		assert( !weakHypotheses.empty() );

		const int numClasses = pData->getNumClasses();
		const int numExamples = pData->getNumExamples();

		// Initialize the output info
		OutputInfo* pOutInfo = NULL;

		if ( !_outputInfoFile.empty() )
			pOutInfo = new OutputInfo(_outputInfoFile);

		// Creating the results structures. See file Structures.h for the
		// PointResults structure
		results.clear();
		results.reserve(numExamples);
		for (int i = 0; i < numExamples; ++i)
			results.push_back( new ExampleResults(i, numClasses) );

		// iterator over all the weak hypotheses
		vector<BaseLearner*>::const_iterator whyIt;
		int t;

		if ( pOutInfo )
			pOutInfo->initialize( pData );

		// for every feature: 1..T
		for (whyIt = weakHypotheses.begin(), t = 0; 
			whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
		{
			BaseLearner* currWeakHyp = *whyIt;
			float alpha = currWeakHyp->getAlpha();

			// for every point
			for (int i = 0; i < numExamples; ++i)
			{
				// a reference for clarity and speed
				vector<float>& currVotesVector = results[i]->getVotesVector();

				// for every class
				for (int l = 0; l < numClasses; ++l)
					currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);
			}

			// if needed output the step-by-step information
			if ( pOutInfo )
			{
				pOutInfo->outputIteration(t);
				pOutInfo->outputError(pData, currWeakHyp);
				
				pOutInfo->outputBalancedError(pData, currWeakHyp);
				if ( ( t % 1 ) == 0 ) {
					pOutInfo->outputROC(pData, currWeakHyp);
				}

				// Margins and edge requires an update of the weight,
				// therefore I keep them out for the moment
				//outInfo.outputMargins(pData, currWeakHyp);
				//outInfo.outputEdge(pData, currWeakHyp);
				pOutInfo->endLine();
			}
		}

		if (pOutInfo)
			delete pOutInfo;

	}
Пример #3
0
	void FilterBoostLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		time_t startTime, currentTime;
		time(&startTime);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

		const int numClasses = pTrainingData->getNumClasses();
		const int numExamples = pTrainingData->getNumExamples();
		
		//initialize the margins variable
		_margins.resize( numExamples );
		for( int i=0; i<numExamples; i++ )
		{
			_margins[i].resize( numClasses );
			fill( _margins[i].begin(), _margins[i].end(), 0.0 );
		}


		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			float constantEnergy = pConstantWeakHypothesis->run();

			pOutInfo = new OutputInfo(_outputInfoFile);
			pOutInfo->initialize(pTrainingData);

			updateMargins( pTrainingData, pConstantWeakHypothesis );

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader();

			pOutInfo->outputIteration(-1);
			pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputError(pTestData, pConstantWeakHypothesis);
			/*
			pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis);
			
			pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis);

			pOutInfo->outputMAE(pTrainingData);

			if (pTestData)
				pOutInfo->outputMAE(pTestData);
			*/
			pOutInfo->outputCurrentTime();

			pOutInfo->endLine();
			pOutInfo->initialize(pTrainingData);
			
			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			filter( pTrainingData, (int)(_Cn * log(t+2.0)) );
			if ( pTrainingData->getNumExamples() < 2 ) 
			{
				filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			}
			
			if (_verbose > 1)
			{
				cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
			}

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();
			pWeakHypothesis->setTrainingData(pTrainingData);
			float energy = pWeakHypothesis->run();

			BaseLearner* pConstantWeakHypothesis;
			if (_withConstantLearner) // check constant learner if user wants it
			{
				pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				float constantEnergy = pConstantWeakHypothesis->run();
			}

			//estimate edge
			filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			float edge = pWeakHypothesis->getEdge() / 2.0;

			if (_withConstantLearner) // check constant learner if user wants it
			{
				float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
				if ( constantEdge > edge )
				{
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
					edge = constantEdge;
				} else {
					delete pConstantWeakHypothesis;
				}
			}

			// calculate alpha
			float alpha = 0.0;
			alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) );
			pWeakHypothesis->setAlpha( alpha );

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			pTrainingData->clearIndexSet();
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			float gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// update the margins
			updateMargins( pTrainingData, pWeakHypothesis );

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}