Exemplo n.º 1
	// -----------------------------------------------------------------------
	// -----------------------------------------------------------------------
	DataReader::DataReader(const nor_utils::Args& args, int verbose) : _verbose(verbose), _args(args)
		string mdpTrainFileName = _args.getValue<string>("traintestmdp", 0);				
		string testFileName = _args.getValue<string>("traintestmdp", 1);				
		string shypFileName = _args.getValue<string>("traintestmdp", 3);
		_numIterations = _args.getValue<int>("traintestmdp", 2);				
		string tmpFname = _args.getValue<string>("traintestmdp", 4);
		if (_verbose > 0)
			cout << "Loading arff data for MDP learning..." << flush;
		// load the arff
		loadInputData(mdpTrainFileName, testFileName, shypFileName);
		if (_verbose > 0)
			cout << "Done." << endl << flush;
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		// The class that loads the weak hypotheses
		UnSerialization us;
		// loads them
		us.loadHypotheses(shypFileName, _weakHypotheses, _pTrainData);			
		if (_numIterations<_weakHypotheses.size())
		if (_verbose > 0)
			cout << "Done." << endl << flush;			
		assert( _weakHypotheses.size() >= _numIterations );
		// calculate the sum of alphas
		vector<BaseLearner*>::iterator it;
		for( it = _weakHypotheses.begin(); it != _weakHypotheses.end(); ++it )
			BaseLearner* currBLearner = *it;
			_sumAlphas += currBLearner->getAlpha();
Exemplo n.º 2
	// -----------------------------------------------------------------------
	// -----------------------------------------------------------------------
	void AdaBoostMDPClassifier::init()
		string mdpTrainFileName = _args.getValue<string>("traintestmdp", 0);				
		string testFileName = _args.getValue<string>("traintestmdp", 1);				
		string shypFileName = _args.getValue<string>("traintestmdp", 3);
		_numIterations = _args.getValue<int>("traintestmdp", 2);				
		string tmpFname = _args.getValue<string>("traintestmdp", 4);
		_outputStream.open( tmpFname.c_str() );
		if (_verbose > 0)
			cout << "Loading arff data for MDP learning..." << flush;
		// load the arff
		loadInputData(mdpTrainFileName, testFileName, shypFileName);
		if (_verbose > 0)
			cout << "Done." << endl << flush;
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		// The class that loads the weak hypotheses
		UnSerialization us;
		// loads them
		us.loadHypotheses(shypFileName, _weakHypotheses, _pData);			
		if (_verbose > 0)
			cout << "Done." << endl << flush;			
		assert( _weakHypotheses.size() >= _numIterations );
		if (_verbose > 0)
			cout << "Allocating grid world..." << flush;
		if (_verbose > 0)
			cout << "Done." << endl << flush;			
Exemplo n.º 3
	int FilterBoostLearner::resumeWeakLearners(InputData* pTrainingData)
		if (_resumeShypFileName.empty())
			return 0;

		if (_verbose > 0)
			cout << "Reloading strong hypothesis file <" << _resumeShypFileName << ">.." << flush;

		// The class that loads the weak hypotheses
		UnSerialization us;

		// loads them
		us.loadHypotheses(_resumeShypFileName, _foundHypotheses, pTrainingData, _verbose);

		if (_verbose > 0)
			cout << "Done!" << endl;

		// return the number of iterations found
		return static_cast<int>( _foundHypotheses.size() );
Exemplo n.º 4
void ParasiteLearner::load(nor_utils::StreamTokenizer& st)
   //   cout << "Sorry, you can't load a ParasiteLearner" << endl << flush;
   //   exit(1);
   // Calling the super-class method

   _signOfAlpha = UnSerialization::seekAndParseEnclosedValue<int>(st, "alphasign");
   _nameBaseLearnerFile = UnSerialization::seekAndParseEnclosedValue<string>(st, "poolfile");
   _selectedIdx = UnSerialization::seekAndParseEnclosedValue<int>(st, "learneridx");

   if (_baseLearners.size() == 0) {
      // load the base learners
      if (_verbose >= 2)
	 cout << "loading " << _nameBaseLearnerFile << ".." << flush;
      UnSerialization us;
      us.loadHypotheses( _nameBaseLearnerFile, _baseLearners, _pTrainingData, _verbose);
      if (_verbose >= 2)
	 cout << "finished " << endl << flush;
Exemplo n.º 5
void MDDAGClassifier::run(const string& dataFileName, const string& shypFileName,
                          int numIterations, const string& outResFileName, int numRanksEnclosed)
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, numIterations );

    const int numClasses = pData->getNumClasses();

    if (_verbose > 0)
        // well.. if verbose = 0 no results are displayed! :)
        cout << "Done!" << endl;

        vector< vector<float> > rankedError(numRanksEnclosed);

        // Get the per-class error for the numRanksEnclosed-th ranks
        for (int i = 0; i < numRanksEnclosed; ++i)
            getClassError( pData, results, rankedError[i], i );

        // output it
        cout << endl;
        cout << "Error Summary" << endl;
        cout << "=============" << endl;

        for ( int l = 0; l < numClasses; ++l )
            // first rank (winner): rankedError[0]
            cout << "Class '" << pData->getClassMap().getNameFromIdx(l) << "': "
                 << setprecision(4) << rankedError[0][l] * 100 << "%";

            // output the others on its side
            if (numRanksEnclosed > 1 && _verbose > 1)
                cout << " (";
                for (int i = 1; i < numRanksEnclosed; ++i)
                    cout << " " << i+1 << ":[" << setprecision(4) << rankedError[i][l] * 100 << "%]";
                cout << " )";

            cout << endl;

        // the overall error
        cout << "\n--> Overall Error: "
             << setprecision(4) << getOverallError(pData, results, 0) * 100 << "%";

        // output the others on its side
        if (numRanksEnclosed > 1 && _verbose > 1)
            cout << " (";
            for (int i = 1; i < numRanksEnclosed; ++i)
                cout << " " << i+1 << ":[" << setprecision(4) << getOverallError(pData, results, i) * 100 << "%]";
            cout << " )";

        cout << endl;

    } // verbose

    // If asked output the results
    if ( !outResFileName.empty() )
        const int numExamples = pData->getNumExamples();
        ofstream outRes(outResFileName.c_str());

        outRes << "Instance" << '\t' << "Forecast" << '\t' << "Labels" << '\n';

        string exampleName;

        for (int i = 0; i < numExamples; ++i)
            // output the name if it exists, otherwise the number
            // of the example
            exampleName = pData->getExampleName(i);
            if ( exampleName.empty() )
                outRes << i << '\t';
                outRes << exampleName << '\t';

            // output the predicted class
            outRes << pData->getClassMap().getNameFromIdx( results[i]->getWinner().first ) << '\t';

            outRes << '|';

            vector<Label>& labels = pData->getLabels(i);
            for (vector<Label>::iterator lIt=labels.begin(); lIt != labels.end(); ++lIt) {
                if (lIt->y>0)
                    outRes << ' ' << pData->getClassMap().getNameFromIdx(lIt->idx);

            outRes << endl;

        if (_verbose > 0)
            cout << "\nPredictions written on file <" << outResFileName << ">!" << endl;


    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
Exemplo n.º 6
void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName,
                                      const string& outFileName, int numIterations)
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    ofstream outFile(outFileName.c_str());
    string exampleName;

    if (_verbose > 0)
        cout << "Output likelihoods..." << flush;

    // get the results
    // computeResults( pData, weakHypotheses, results, numIterations );
    assert( !weakHypotheses.empty() );

    // Initialize the output info
    OutputInfo* pOutInfo = NULL;

    if ( !_outputInfoFile.empty() )
        pOutInfo = new OutputInfo(_outputInfoFile, "err");

    // Creating the results structures. See file Structures.h for the
    // PointResults structure
    for (int i = 0; i < numExamples; ++i)
        results.push_back( new ExampleResults(i, numClasses) );

    // sum votes for classes
    vector< AlphaReal > votesForExamples( numClasses );
    vector< AlphaReal > expVotesForExamples( numClasses );

    // iterator over all the weak hypotheses
    vector<BaseLearner*>::const_iterator whyIt;
    int t;

    pOutInfo->initialize( pData );

    // for every feature: 1..T
    for (whyIt = weakHypotheses.begin(), t = 0;
            whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
        BaseLearner* currWeakHyp = *whyIt;
        AlphaReal alpha = currWeakHyp->getAlpha();

        // for every point
        for (int i = 0; i < numExamples; ++i)
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();

            // for every class
            for (int l = 0; l < numClasses; ++l)
                currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);

        // if needed output the step-by-step information
        if ( pOutInfo )
            pOutInfo->outputCustom(pData, currWeakHyp);

            // Margins and edge requires an update of the weight,
            // therefore I keep them out for the moment
            //outInfo.outputMargins(pData, currWeakHyp);
            //outInfo.outputEdge(pData, currWeakHyp);


        } // for (int i = 0; i < numExamples; ++i)
        // calculate likelihoods from votes

        fill( votesForExamples.begin(), votesForExamples.end(), 0.0 );
        AlphaReal lLambda = 0.0;
        for (int i = 0; i < numExamples; ++i)
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();
            AlphaReal sumExp = 0.0;
            // for every class
            for (int l = 0; l < numClasses; ++l)
                expVotesForExamples[l] =  exp( currVotesVector[l] ) ;
                sumExp += expVotesForExamples[l];

            if ( sumExp > numeric_limits<AlphaReal>::epsilon() )
                for (int l = 0; l < numClasses; ++l)
                    expVotesForExamples[l] /= sumExp;

            Example ex = pData->getExample( results[i]->getIdx() );
            vector<Label> labs = ex.getLabels();
            AlphaReal m = numeric_limits<AlphaReal>::infinity();
            for (int l = 0; l < numClasses; ++l)
                if ( labs[l].y > 0 )
                    if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() )
                        AlphaReal logVal = log( expVotesForExamples[l] );

                        if ( logVal != m ) {
                            lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal );


        outFile << t << "\t" << lLambda ;
        outFile << '\n';


    if (pOutInfo)
        delete pOutInfo;

    // computeResults( pData, weakHypotheses, results, numIterations );

     for (int i = 0; i < numExamples; ++i)
     // output the name if it exists, otherwise the number
     // of the example
     exampleName = pData->getExampleName(i);
     if ( !exampleName.empty() )
     outFile << exampleName << ',';

     // output the posteriors
     outFile << results[i]->getVotesVector()[0];
     for (int l = 1; l < numClasses; ++l)
     outFile << ',' << results[i]->getVotesVector()[l];
     outFile << '\n';

    if (_verbose > 0)
        cout << "Done!" << endl;

    if (_verbose > 1)
        cout << "\nClass order (You can change it in the header of the data file):" << endl;
        for (int l = 0; l < numClasses; ++l)
            cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
Exemplo n.º 7
void MDDAGClassifier::saveCalibratedPosteriors(const string& dataFileName, const string& shypFileName,
        const string& outFileName, int numIterations)
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, numIterations );

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    ofstream outFile(outFileName.c_str());
    string exampleName;

    if (_verbose > 0)
        cout << "Output posteriors..." << flush;

    for (int i = 0; i < numExamples; ++i)
        // output the name if it exists, otherwise the number
        // of the example
        exampleName = pData->getExampleName(i);
        if ( !exampleName.empty() )
            outFile << exampleName << ',';

        // output the posteriors
        outFile << results[i]->getVotesVector()[0];
        for (int l = 1; l < numClasses; ++l)
            outFile << ',' << results[i]->getVotesVector()[l];
        outFile << '\n';

    if (_verbose > 0)
        cout << "Done!" << endl;

    if (_verbose > 1)
        cout << "\nClass order (You can change it in the header of the data file):" << endl;
        for (int l = 0; l < numClasses; ++l)
            cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
Exemplo n.º 8
void MDDAGClassifier::saveConfusionMatrix(const string& dataFileName, const string& shypFileName,
        const string& outFileName)
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, (int)weakHypotheses.size() );

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    ofstream outFile(outFileName.c_str());


    for (int l = 0; l < numClasses; ++l)
        outFile << '\t' << pData->getClassMap().getNameFromIdx(l);
    outFile << endl;

    for (int l = 0; l < numClasses; ++l)
        vector<int> winnerCount(numClasses, 0);
        for (int i = 0; i < numExamples; ++i)
            if ( pData->hasPositiveLabel(i,l) )
                ++winnerCount[ results[i]->getWinner().first ];

        // class name
        outFile << pData->getClassMap().getNameFromIdx(l);

        for (int j = 0; j < numClasses; ++j)
            outFile << '\t' << winnerCount[j];

        outFile << endl;


    if (_verbose > 0)
        cout << "Done!" << endl;

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
Exemplo n.º 9
void MDDAGClassifier::printConfusionMatrix(const string& dataFileName, const string& shypFileName)
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, (int)weakHypotheses.size());

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    if (_verbose > 0)
        cout << "Done!" << endl;

    const int colSize = 7;

    if (_verbose > 0)
        cout << "Raw Confusion Matrix:\n";
        cout << setw(colSize) << "Truth       ";

        for (int l = 0; l < numClasses; ++l)
            cout << setw(colSize) << nor_utils::getAlphanumeric(l);

        cout << "\nClassification\n";

        for (int l = 0; l < numClasses; ++l)
            vector<int> winnerCount(numClasses, 0);
            for (int i = 0; i < numExamples; ++i)
                if ( pData->hasPositiveLabel(i, l) )
                    ++winnerCount[ results[i]->getWinner().first ];

            // class
            cout << setw(colSize) << "           " << nor_utils::getAlphanumeric(l);

            for (int j = 0; j < numClasses; ++j)
                cout << setw(colSize) << winnerCount[j];

            cout << endl;


    cout << "\nMatrix Key:\n";

    // Print the legend
    for (int l = 0; l < numClasses; ++l)
        cout << setw(5) << nor_utils::getAlphanumeric(l) << ": " <<
             pData->getClassMap().getNameFromIdx(l) << "\n";

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
Exemplo n.º 10
	void AdaBoostMHClassifier::saveROC(const string& dataFileName, const string& shypFileName, 
		const string& outFileName, int numIterations)
		InputData* pData = loadInputData(dataFileName, shypFileName);
		ofstream outFile(outFileName.c_str());
		if ( ! outFile.is_open() )
			cout << "Cannot open outfile" << endl;
			exit( -1 );

		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;

		// The class that loads the weak hypotheses
		UnSerialization us;

		// Where to put the weak hypotheses
		vector<BaseLearner*> weakHypotheses;

		// loads them
		us.loadHypotheses(shypFileName, weakHypotheses, pData);
		weakHypotheses.resize( numIterations );

		// where the results go
		vector< ExampleResults* > results;

		if (_verbose > 0)
			cout << "Classifying..." << flush;

		// get the results
		computeResults( pData, weakHypotheses, results, weakHypotheses.size());

		const int numClasses = pData->getNumClasses();
		const int numExamples = pData->getNumExamples();

		if (_verbose > 0)
			cout << "Done!" << endl;		

		vector< pair< int, double> > sortedExample( numExamples );
		for( int i=0; i<numExamples; i++ )
			sortedExample[i].first = i;
			sortedExample[i].second = results[i]->getVotesVector()[0];
		sort( sortedExample.begin(), sortedExample.end(), nor_utils::comparePair< 2, int, double, greater<double> >() );

		vector<double> positiveWeights( numExamples );
		double sumOfPositiveWeights = 0.0;

		vector<double>  negativeWeights( numExamples );
		double sumOfNegativeWeights = 0.0;
		fill( positiveWeights.begin(), positiveWeights.end(), 0.0 );
		fill( negativeWeights.begin(), negativeWeights.end(), 0.0 );

		string className = pData->getClassMap().getNameFromIdx( 0 );

		vector<Label>& labels = pData->getLabels( sortedExample[0].first );
		vector<Label>::iterator labIt = find( labels.begin(), labels.end(), 0);
		if ( labIt != labels.end() )
			if ( labIt->y > 0.0 )
				positiveWeights[0] = labIt->initialWeight;
				sumOfPositiveWeights += labIt->initialWeight;
			} else
				negativeWeights[0] = labIt->initialWeight;
				sumOfNegativeWeights += labIt->initialWeight;
		for( int i=1; i<numExamples; i++ )
			labels = pData->getLabels( sortedExample[i].first );
			labIt = find( labels.begin(), labels.end(), 0);
			if ( labIt != labels.end() )
				if ( labIt->y > 0.0 )
					negativeWeights[i] = negativeWeights[i-1];
					positiveWeights[i] = positiveWeights[i-1] + labIt->initialWeight;
					sumOfPositiveWeights += labIt->initialWeight;
				} else
					positiveWeights[i] = positiveWeights[i-1];
					negativeWeights[i] = negativeWeights[i-1] + labIt->initialWeight;
					sumOfNegativeWeights += labIt->initialWeight;
			} else {
				positiveWeights[i] = positiveWeights[i-1];
				negativeWeights[i] = negativeWeights[i-1];

		outFile << "Class name: " << className << endl;
		for( int i=0; i<numExamples; i++ )
			outFile <<  sortedExample[i].first << " ";
			// false positive rate
			outFile << ( positiveWeights[i] / sumOfPositiveWeights ) << " ";
			//true negative rate
			outFile << ( negativeWeights[i] / sumOfNegativeWeights ) << endl;


		// delete the input data file
		if (pData) 
			delete pData;

		vector<ExampleResults*>::iterator it;
		for (it = results.begin(); it != results.end(); ++it)
			delete (*it);
Exemplo n.º 11
float ParasiteLearner::run()
   if (_baseLearners.size() == 0) {
      // load the base learners
      if (_verbose >= 2)
	 cout << "loading " << _nameBaseLearnerFile << ".." << flush;
      UnSerialization us;
      us.loadHypotheses( _nameBaseLearnerFile, _baseLearners, _pTrainingData, _verbose);
      if (_verbose >= 2)
	 cout << "finished " << endl << flush;
   if ( _numBaseLearners == -1 || _numBaseLearners > _baseLearners.size())
      _numBaseLearners = _baseLearners.size();
   const int numClasses = _pTrainingData->getNumClasses();
   const int numExamples = _pTrainingData->getNumExamples();
   float tmpAlpha;
   float bestE = numeric_limits<float>::max();
   float sumGamma, bestSumGamma = -numeric_limits<float>::max();
   float tmpE, gamma;
   float eps_min,eps_pls;
   int tmpSignOfAlpha;

   // This is the bottleneck, squeeze out every microsecond
   if (_closed) {
      bestSumGamma = 0;
      if ( nor_utils::is_zero(_theta) ) {
	 for (int j = 0; j < _numBaseLearners; ++j) {
	    sumGamma = 0;
	    for (int i = 0; i < numExamples; ++i) {
	       vector<Label> labels = _pTrainingData->getLabels(i);
	       for (int l = 0; l < numClasses; ++l)
		  sumGamma += labels[l].weight * 
		     _baseLearners[j]->classify(_pTrainingData,i,l) * labels[l].y;
	    if (fabs(sumGamma) > fabs(bestSumGamma)) {
	       _selectedIdx = j;
	       bestSumGamma = sumGamma;
	 eps_pls = eps_min = 0;
	 for (int i = 0; i < numExamples; ++i) {
	    vector<Label> labels = _pTrainingData->getLabels(i);
	    for (int l = 0; l < numClasses; ++l) {
	       gamma = _baseLearners[_selectedIdx]->classify(_pTrainingData,i,l) *
	       if ( gamma > 0 )
		  eps_pls += labels[l].weight;
	       else if ( gamma < 0 )
		  eps_min += labels[l].weight;
	 if (eps_min > eps_pls) {
	    float tmpSwap = eps_min;
	    eps_min = eps_pls;
	    eps_pls = tmpSwap;
	    _signOfAlpha = -1;
	 _alpha = getAlpha(eps_min, eps_pls);
	 bestE = BaseLearner::getEnergy( eps_min, eps_pls );
      else {
	 for (int j = 0; j < _numBaseLearners; ++j) {
	    eps_pls = eps_min = 0;
	    for (int i = 0; i < numExamples; ++i) {
	       vector<Label> labels = _pTrainingData->getLabels(i);
	       for (int l = 0; l < numClasses; ++l) {
		  gamma = _baseLearners[j]->classify(_pTrainingData,i,l) * labels[l].y;
		  if ( gamma > 0 )
		     eps_pls += labels[l].weight;
		  else if ( gamma < 0 )
		     eps_min += labels[l].weight;
	    if (eps_min > eps_pls) {
	       float tmpSwap = eps_min;
	       eps_min = eps_pls;
	       eps_pls = tmpSwap;
	       tmpSignOfAlpha = -1;
	       tmpSignOfAlpha = 1;
	    tmpAlpha = getAlpha(eps_min, eps_pls, _theta);
	    tmpE = BaseLearner::getEnergy( eps_min, eps_pls, tmpAlpha, _theta );
	    if (tmpE < bestE && eps_pls > eps_min + _theta) {
	       _alpha = tmpAlpha;
	       _selectedIdx = j;
	       _signOfAlpha = tmpSignOfAlpha;
	       bestE = tmpE;
   else {
      if ( nor_utils::is_zero(_theta) ) {
	 for (int j = 0; j < _numBaseLearners; ++j) {
	    sumGamma = 0;
	    for (int i = 0; i < numExamples; ++i) {
	       vector<Label> labels = _pTrainingData->getLabels(i);
	       for (int l = 0; l < numClasses; ++l)
		  sumGamma += labels[l].weight * 
		     _baseLearners[j]->classify(_pTrainingData,i,l) * labels[l].y;
	    if (sumGamma > bestSumGamma) {
	       _selectedIdx = j;
	       bestSumGamma = sumGamma;
	 eps_pls = eps_min = 0;
	 for (int i = 0; i < numExamples; ++i) {
	    vector<Label> labels = _pTrainingData->getLabels(i);
	    for (int l = 0; l < numClasses; ++l) {
	       gamma = _baseLearners[_selectedIdx]->classify(_pTrainingData,i,l) *
	       if ( gamma > 0 )
		  eps_pls += labels[l].weight;
	       else if ( gamma < 0 )
		  eps_min += labels[l].weight;
	 _alpha = getAlpha(eps_min, eps_pls);
	 bestE = BaseLearner::getEnergy( eps_min, eps_pls );
      else {
	 for (int j = 0; j < _numBaseLearners; ++j) {
	    eps_pls = eps_min = 0;
	    for (int i = 0; i < numExamples; ++i) {
	       vector<Label> labels = _pTrainingData->getLabels(i);
	       for (int l = 0; l < numClasses; ++l) {
		  gamma = _baseLearners[j]->classify(_pTrainingData,i,l) * labels[l].y;
		  if ( gamma > 0 )
		     eps_pls += labels[l].weight;
		  else if ( gamma < 0 )
		     eps_min += labels[l].weight;
	    tmpAlpha = getAlpha(eps_min, eps_pls, _theta);
	    tmpE = BaseLearner::getEnergy( eps_min, eps_pls, tmpAlpha, _theta );
	    if (tmpE < bestE && eps_pls > eps_min + _theta) {
	       _alpha = tmpAlpha;
	       _selectedIdx = j;
	       bestE = tmpE;
	    //cout << j << ": e- = " << eps_min << "\t e+ = " << eps_pls << "\t edge = " << (eps_pls - eps_min) << "\t energy = " << tmpE << "\t energy* = " << bestE << "\t alpha = " << tmpAlpha << endl << flush;
   return bestE;
Exemplo n.º 12
    void SoftCascadeLearner::run(const nor_utils::Args& args)
        // load the arguments
        //print cascade properties
        if (_verbose > 0) {
            cout    << "[+] Softcascade parameters :" << endl
                    << "\t --> target detection rate = " << _targetDetectionRate << endl
                    << "\t --> alpha (exp param) = " << _alphaExponentialParameter << endl
                    << "\t --> bootstrap rate = " << _bootstrapRate << endl
                    << endl;

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->load(_trainFileName, IT_TRAIN, 5);

        InputData* pBootstrapData = NULL;
        if (!_bootstrapFileName.empty()) {
            pBootstrapData = pWeakHypothesisSource->createInputData();
            pBootstrapData->load(_bootstrapFileName, IT_TRAIN, 5);
        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->load(_testFileName, IT_TEST, 5);

        Serialization ss(_shypFileName, false );
//        outputHeader();
        // The output information object
        OutputInfo* pOutInfo = NULL;

        if ( !_outputInfoFile.empty() ) 
            pOutInfo = new OutputInfo(args, true);
            pOutInfo->setOutputList("sca", &args);
            if (pTestData)
            pOutInfo->outputHeader(pTrainingData->getClassMap(), true, true, false);
//        ofstream trainPosteriorsFile;
//        ofstream testPosteriorsFile;
        const NameMap& namemap = pTrainingData->getClassMap();
        _positiveLabelIndex = namemap.getIdxFromName(_positiveLabelName);

        // FIXME: output posteriors

//        OutputInfo* pTrainPosteriorsOut = NULL;
//        OutputInfo* pTestPosteriorsOut = NULL;
//        if (! _trainPosteriorsFileName.empty()) {
//            pTrainPosteriorsOut = new OutputInfo(_trainPosteriorsFileName, "pos", true);
//            pTrainPosteriorsOut->initialize(pTrainingData);
//            dynamic_cast<PosteriorsOutput*>( pTrainPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );
//        }
//        if (! _testPosteriorsFileName.empty() && !_testFileName.empty() ) {
//            pTestPosteriorsOut = new OutputInfo(_testPosteriorsFileName, "pos", true);
//            pTestPosteriorsOut->initialize(pTestData);
//            dynamic_cast<PosteriorsOutput*>( pTestPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );            
//        }
        const int numExamples = pTrainingData->getNumExamples();

        vector<BaseLearner*> inWeakHypotheses;
        if (_fullRun) {            
            // TODO : the full training is implementet, testing is needed
            AdaBoostMHLearner* sHypothesis = new AdaBoostMHLearner();
            sHypothesis->run(args, pTrainingData, _baseLearnerName, _numIterations, inWeakHypotheses );
            delete sHypothesis;
        else { 
            cout << "[+] Loading uncalibrated shyp file... ";
            //read the shyp file of the trained classifier
            UnSerialization us;
            us.loadHypotheses(_unCalibratedShypFileName, inWeakHypotheses, pTrainingData);  
            if (_inShypLimit > 0 && _inShypLimit < inWeakHypotheses.size() ) {
            if (_numIterations > inWeakHypotheses.size()) {
                _numIterations = inWeakHypotheses.size();
            cout << "weak hypotheses loaded, " << inWeakHypotheses.size() << " retained.\n";
        // some initializations
        double faceRejectionFraction = 0.;
        double estimatedExecutionTime = 0.;
        vector<double> rejectionDistributionVector;

        set<int> trainingIndices;
        for (int i = 0; i < numExamples; i++) {
            trainingIndices.insert(pTrainingData->getRawIndex(i) );
        // init v_t (see the paper)
        initializeRejectionDistributionVector(_numIterations, rejectionDistributionVector);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;

        // Starting the SoftCascade main loop
        for (int t = 0; t < _numIterations; ++t)
            if (_verbose > 0)
                cout << "--------------[ iteration " << (t+1) << " ]--------------" << endl;

            faceRejectionFraction += rejectionDistributionVector[t];
            cout << "[+] Face rejection tolerated : " << faceRejectionFraction << " | v[t] = " << rejectionDistributionVector[t] << endl;
            int numberOfNegatives = pTrainingData->getNumExamplesPerClass(1 - _positiveLabelIndex);
            //vector<BaseLearner*>::const_iterator whyIt;
            int selectedIndex = 0;
            AlphaReal bestGap = 0;
            vector<AlphaReal> posteriors;
            computePosteriors(pTrainingData, _foundHypotheses, posteriors, _positiveLabelIndex);
            //should use an iterator instead of i
            vector<BaseLearner*>::iterator whyIt;
            int i;
            for (i = 0, whyIt = inWeakHypotheses.begin(); whyIt != inWeakHypotheses.end(); ++whyIt, ++i) {
                vector<AlphaReal> temporaryPosteriors = posteriors;
                vector<BaseLearner*> temporaryWeakHyp = _foundHypotheses;
                updatePosteriors(pTrainingData, *whyIt, temporaryPosteriors, _positiveLabelIndex);
                AlphaReal gap = computeSeparationSpan(pTrainingData, temporaryPosteriors, _positiveLabelIndex );

                if (gap > bestGap) {
                    bestGap = gap;
                    selectedIndex = i;
            BaseLearner* selectedWeakHypothesis = inWeakHypotheses[selectedIndex];
            cout << "[+] Rank of the selected weak hypothesis : " << selectedIndex << endl
                 << "\t ---> edge gap = " << bestGap << endl
                 << "\t ---> alpha = " << selectedWeakHypothesis->getAlpha() << endl;

            //update the stages
            updatePosteriors(pTrainingData, selectedWeakHypothesis, posteriors, _positiveLabelIndex);
            double missesFraction;
            AlphaReal r = findBestRejectionThreshold(pTrainingData, posteriors, faceRejectionFraction, missesFraction);
            // update the output info object
            dynamic_cast<SoftCascadeOutput*>( pOutInfo->getOutputInfoObject("sca") )->appendRejectionThreshold(r);
            cout << "[+] Rejection threshold = " << r << endl;
            //some updates
            ss.appendHypothesisWithThreshold(t, selectedWeakHypothesis, r);
            faceRejectionFraction -= missesFraction;
            inWeakHypotheses.erase(inWeakHypotheses.begin() + selectedIndex);
            double whypCost = 1; //just in case there are different costs for each whyp
            estimatedExecutionTime += whypCost * numberOfNegatives;
            // output perf in file
            vector< vector< AlphaReal> > scores(0);
            _output << t + 1 << setw(_sepWidth + 1) << r << setw(_sepWidth);
            // update OutputInfo with the new whyp
//            updateOutputInfo(pOutInfo, pTrainingData, selectedWeakHypothesis);
//            if (pTestData) {
//                updateOutputInfo(pOutInfo, pTestData, selectedWeakHypothesis);
//            }

            // output the iteration results
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, selectedWeakHypothesis, r);
//            if (pTrainPosteriorsOut) {
//                pTrainPosteriorsOut->setTable(pTrainingData, pOutInfo->getTable(pTrainingData));
//                pTrainPosteriorsOut->outputCustom(pTrainingData);
//            }
//            if (pTestPosteriorsOut) {
//                pTestPosteriorsOut->setTable(pTestData, pOutInfo->getTable(pTestData));
//                pTestPosteriorsOut->outputCustom(pTestData);
//            }
            int leftNegatives = filterDataset(pTrainingData, posteriors, r, trainingIndices);
            if (leftNegatives == 0) {
                cout << endl << "[+] No more negatives.\n";
            if (_bootstrapRate != 0) {
                bootstrapTrainingSet(pTrainingData, pBootstrapData, trainingIndices);

        }  // loop on iterations

        // write the footer of the strong hypothesis file

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pBootstrapData) {
            delete pBootstrapData;
        if (pTestData)
            delete pTestData;

        if (_verbose > 0)
            cout << "Learning completed." << endl;