C++ (Cpp) InputData::getNumExamples 예제들

프로그래밍 언어: C++ (Cpp)

클래스/타입: InputData

메소드/함수: getNumExamples

hotexamples.com에서의 예제들: 13

C++ (Cpp) InputData::getNumExamples - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 InputData::getNumExamples에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getNumExamples(13)

getClassMap(12)

getNumClasses(8)

getPort(7)

load(6)

initOptions(6)

getName(5)

getLabels(5)

SetModifier(4)

AsMultiTouchInput(4)

getExample(4)

isRequired(3)

getSharedRAbundVectors(3)

getDescription(3)

AsMouseInput(3)

SetExtended(3)

SetRaw(2)

getExampleName(2)

verifyWritesHaveData(2)

terminateAllParsers(2)

AsPanGestureInput(2)

AsPinchGestureInput(2)

AsScrollWheelInput(2)

hasPositiveLabel(2)

AsTapGestureInput(2)

writeOutput(2)

clearIndexSet(2)

extract(2)

extract_py(2)

generateReduced(2)

IsMouseInput(1)

setPort(1)

pushVector(1)

push_back(1)

setCarrier(1)

setDescription(1)

setName(1)

setPriority(1)

setPortType(1)

openOutput(1)

setRequired(1)

size(1)

sucCount(1)

terminate(1)

toString(1)

withPriority(1)

owner(1)

multi_plusToA(1)

nComponents(1)

GetExtendedInput(1)

예제 #1

파일 보기

파일: MDDAGClassifier.cpp 프로젝트: busarobi/MDDAG2

void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName,
                                      const string& outFileName, int numIterations)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();


    ofstream outFile(outFileName.c_str());
    string exampleName;

    if (_verbose > 0)
        cout << "Output likelihoods..." << flush;

    // get the results
    /////////////////////////////////////////////////////////////////////
    // computeResults( pData, weakHypotheses, results, numIterations );
    assert( !weakHypotheses.empty() );

    // Initialize the output info
    OutputInfo* pOutInfo = NULL;

    if ( !_outputInfoFile.empty() )
        pOutInfo = new OutputInfo(_outputInfoFile, "err");

    // Creating the results structures. See file Structures.h for the
    // PointResults structure
    results.clear();
    results.reserve(numExamples);
    for (int i = 0; i < numExamples; ++i)
        results.push_back( new ExampleResults(i, numClasses) );

    // sum votes for classes
    vector< AlphaReal > votesForExamples( numClasses );
    vector< AlphaReal > expVotesForExamples( numClasses );

    // iterator over all the weak hypotheses
    vector<BaseLearner*>::const_iterator whyIt;
    int t;

    pOutInfo->initialize( pData );

    // for every feature: 1..T
    for (whyIt = weakHypotheses.begin(), t = 0;
            whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
    {
        BaseLearner* currWeakHyp = *whyIt;
        AlphaReal alpha = currWeakHyp->getAlpha();

        // for every point
        for (int i = 0; i < numExamples; ++i)
        {
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();

            // for every class
            for (int l = 0; l < numClasses; ++l)
                currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);
        }

        // if needed output the step-by-step information
        if ( pOutInfo )
        {
            pOutInfo->outputIteration(t);
            pOutInfo->outputCustom(pData, currWeakHyp);

            // Margins and edge requires an update of the weight,
            // therefore I keep them out for the moment
            //outInfo.outputMargins(pData, currWeakHyp);
            //outInfo.outputEdge(pData, currWeakHyp);

            pOutInfo->endLine();

        } // for (int i = 0; i < numExamples; ++i)
        // calculate likelihoods from votes

        fill( votesForExamples.begin(), votesForExamples.end(), 0.0 );
        AlphaReal lLambda = 0.0;
        for (int i = 0; i < numExamples; ++i)
        {
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();
            AlphaReal sumExp = 0.0;
            // for every class
            for (int l = 0; l < numClasses; ++l)
            {
                expVotesForExamples[l] =  exp( currVotesVector[l] ) ;
                sumExp += expVotesForExamples[l];
            }

            if ( sumExp > numeric_limits<AlphaReal>::epsilon() )
            {
                for (int l = 0; l < numClasses; ++l)
                {
                    expVotesForExamples[l] /= sumExp;
                }
            }

            Example ex = pData->getExample( results[i]->getIdx() );
            vector<Label> labs = ex.getLabels();
            AlphaReal m = numeric_limits<AlphaReal>::infinity();
            for (int l = 0; l < numClasses; ++l)
            {
                if ( labs[l].y > 0 )
                {
                    if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() )
                    {
                        AlphaReal logVal = log( expVotesForExamples[l] );

                        if ( logVal != m ) {
                            lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal );
                        }
                    }
                }
            }


        }


        outFile << t << "\t" << lLambda ;
        outFile << '\n';

        outFile.flush();
    }

    if (pOutInfo)
        delete pOutInfo;

    // computeResults( pData, weakHypotheses, results, numIterations );
    ///////////////////////////////////////////////////////////////////////////////////


    /*
     for (int i = 0; i < numExamples; ++i)
     {
     // output the name if it exists, otherwise the number
     // of the example
     exampleName = pData->getExampleName(i);
     if ( !exampleName.empty() )
     outFile << exampleName << ',';

     // output the posteriors
     outFile << results[i]->getVotesVector()[0];
     for (int l = 1; l < numClasses; ++l)
     outFile << ',' << results[i]->getVotesVector()[l];
     outFile << '\n';
     }
     */

    if (_verbose > 0)
        cout << "Done!" << endl;

    if (_verbose > 1)
    {
        cout << "\nClass order (You can change it in the header of the data file):" << endl;
        for (int l = 0; l < numClasses; ++l)
            cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;
    }

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}

예제 #2

파일 보기

파일: MDDAGClassifier.cpp 프로젝트: busarobi/MDDAG2

void MDDAGClassifier::run(const string& dataFileName, const string& shypFileName,
                          int numIterations, const string& outResFileName, int numRanksEnclosed)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, numIterations );

    const int numClasses = pData->getNumClasses();

    if (_verbose > 0)
    {
        // well.. if verbose = 0 no results are displayed! :)
        cout << "Done!" << endl;

        vector< vector<float> > rankedError(numRanksEnclosed);

        // Get the per-class error for the numRanksEnclosed-th ranks
        for (int i = 0; i < numRanksEnclosed; ++i)
            getClassError( pData, results, rankedError[i], i );

        // output it
        cout << endl;
        cout << "Error Summary" << endl;
        cout << "=============" << endl;

        for ( int l = 0; l < numClasses; ++l )
        {
            // first rank (winner): rankedError[0]
            cout << "Class '" << pData->getClassMap().getNameFromIdx(l) << "': "
                 << setprecision(4) << rankedError[0][l] * 100 << "%";

            // output the others on its side
            if (numRanksEnclosed > 1 && _verbose > 1)
            {
                cout << " (";
                for (int i = 1; i < numRanksEnclosed; ++i)
                    cout << " " << i+1 << ":[" << setprecision(4) << rankedError[i][l] * 100 << "%]";
                cout << " )";
            }

            cout << endl;
        }

        // the overall error
        cout << "\n--> Overall Error: "
             << setprecision(4) << getOverallError(pData, results, 0) * 100 << "%";

        // output the others on its side
        if (numRanksEnclosed > 1 && _verbose > 1)
        {
            cout << " (";
            for (int i = 1; i < numRanksEnclosed; ++i)
                cout << " " << i+1 << ":[" << setprecision(4) << getOverallError(pData, results, i) * 100 << "%]";
            cout << " )";
        }

        cout << endl;

    } // verbose


    // If asked output the results
    if ( !outResFileName.empty() )
    {
        const int numExamples = pData->getNumExamples();
        ofstream outRes(outResFileName.c_str());

        outRes << "Instance" << '\t' << "Forecast" << '\t' << "Labels" << '\n';

        string exampleName;

        for (int i = 0; i < numExamples; ++i)
        {
            // output the name if it exists, otherwise the number
            // of the example
            exampleName = pData->getExampleName(i);
            if ( exampleName.empty() )
                outRes << i << '\t';
            else
                outRes << exampleName << '\t';

            // output the predicted class
            outRes << pData->getClassMap().getNameFromIdx( results[i]->getWinner().first ) << '\t';

            outRes << '|';

            vector<Label>& labels = pData->getLabels(i);
            for (vector<Label>::iterator lIt=labels.begin(); lIt != labels.end(); ++lIt) {
                if (lIt->y>0)
                {
                    outRes << ' ' << pData->getClassMap().getNameFromIdx(lIt->idx);
                }
            }

            outRes << endl;
        }

        if (_verbose > 0)
            cout << "\nPredictions written on file <" << outResFileName << ">!" << endl;

    }


    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}

예제 #3

파일 보기

파일: MDDAGClassifier.cpp 프로젝트: busarobi/MDDAG2

void MDDAGClassifier::saveConfusionMatrix(const string& dataFileName, const string& shypFileName,
        const string& outFileName)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, (int)weakHypotheses.size() );

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    ofstream outFile(outFileName.c_str());

    //////////////////////////////////////////////////////////////////////////

    for (int l = 0; l < numClasses; ++l)
        outFile << '\t' << pData->getClassMap().getNameFromIdx(l);
    outFile << endl;

    for (int l = 0; l < numClasses; ++l)
    {
        vector<int> winnerCount(numClasses, 0);
        for (int i = 0; i < numExamples; ++i)
        {
            if ( pData->hasPositiveLabel(i,l) )
                ++winnerCount[ results[i]->getWinner().first ];
        }

        // class name
        outFile << pData->getClassMap().getNameFromIdx(l);

        for (int j = 0; j < numClasses; ++j)
            outFile << '\t' << winnerCount[j];

        outFile << endl;
    }

    //////////////////////////////////////////////////////////////////////////

    if (_verbose > 0)
        cout << "Done!" << endl;

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}

예제 #4

파일 보기

파일: MDDAGClassifier.cpp 프로젝트: busarobi/MDDAG2

void MDDAGClassifier::saveCalibratedPosteriors(const string& dataFileName, const string& shypFileName,
        const string& outFileName, int numIterations)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, numIterations );

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    ofstream outFile(outFileName.c_str());
    string exampleName;

    if (_verbose > 0)
        cout << "Output posteriors..." << flush;

    for (int i = 0; i < numExamples; ++i)
    {
        // output the name if it exists, otherwise the number
        // of the example
        exampleName = pData->getExampleName(i);
        if ( !exampleName.empty() )
            outFile << exampleName << ',';

        // output the posteriors
        outFile << results[i]->getVotesVector()[0];
        for (int l = 1; l < numClasses; ++l)
            outFile << ',' << results[i]->getVotesVector()[l];
        outFile << '\n';
    }

    if (_verbose > 0)
        cout << "Done!" << endl;

    if (_verbose > 1)
    {
        cout << "\nClass order (You can change it in the header of the data file):" << endl;
        for (int l = 0; l < numClasses; ++l)
            cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;
    }

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}

예제 #5

파일 보기

파일: AdaBoostMHClassifier.cpp 프로젝트: ShenWei/src

	void AdaBoostMHClassifier::saveROC(const string& dataFileName, const string& shypFileName, 
		const string& outFileName, int numIterations)
	{
		InputData* pData = loadInputData(dataFileName, shypFileName);
		ofstream outFile(outFileName.c_str());
		
		if ( ! outFile.is_open() )
		{
			cout << "Cannot open outfile" << endl;
			exit( -1 );
		}

		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;

		// The class that loads the weak hypotheses
		UnSerialization us;

		// Where to put the weak hypotheses
		vector<BaseLearner*> weakHypotheses;

		// loads them
		us.loadHypotheses(shypFileName, weakHypotheses, pData);
		weakHypotheses.resize( numIterations );

		// where the results go
		vector< ExampleResults* > results;

		if (_verbose > 0)
			cout << "Classifying..." << flush;

		// get the results
		computeResults( pData, weakHypotheses, results, weakHypotheses.size());

		const int numClasses = pData->getNumClasses();
		const int numExamples = pData->getNumExamples();

		if (_verbose > 0)
			cout << "Done!" << endl;		

		vector< pair< int, double> > sortedExample( numExamples );
		
		for( int i=0; i<numExamples; i++ )
		{
			sortedExample[i].first = i;
			sortedExample[i].second = results[i]->getVotesVector()[0];
		}
		sort( sortedExample.begin(), sortedExample.end(), nor_utils::comparePair< 2, int, double, greater<double> >() );

		vector<double> positiveWeights( numExamples );
		double sumOfPositiveWeights = 0.0;

		vector<double>  negativeWeights( numExamples );
		double sumOfNegativeWeights = 0.0;
		
		fill( positiveWeights.begin(), positiveWeights.end(), 0.0 );
		fill( negativeWeights.begin(), negativeWeights.end(), 0.0 );

		string className = pData->getClassMap().getNameFromIdx( 0 );

		vector<Label>& labels = pData->getLabels( sortedExample[0].first );
		vector<Label>::iterator labIt = find( labels.begin(), labels.end(), 0);
		
		if ( labIt != labels.end() )
		{
			if ( labIt->y > 0.0 )
			{
				positiveWeights[0] = labIt->initialWeight;
				sumOfPositiveWeights += labIt->initialWeight;
			} else
			{
				negativeWeights[0] = labIt->initialWeight;
				sumOfNegativeWeights += labIt->initialWeight;
			}
		}
		
		for( int i=1; i<numExamples; i++ )
		{
			labels = pData->getLabels( sortedExample[i].first );
			labIt = find( labels.begin(), labels.end(), 0);
			if ( labIt != labels.end() )
			{
				if ( labIt->y > 0.0 )
				{
					negativeWeights[i] = negativeWeights[i-1];
					positiveWeights[i] = positiveWeights[i-1] + labIt->initialWeight;
					sumOfPositiveWeights += labIt->initialWeight;
				} else
				{
					positiveWeights[i] = positiveWeights[i-1];
					negativeWeights[i] = negativeWeights[i-1] + labIt->initialWeight;
					sumOfNegativeWeights += labIt->initialWeight;
				}
			} else {
				positiveWeights[i] = positiveWeights[i-1];
				negativeWeights[i] = negativeWeights[i-1];
			}
		}

		outFile << "Class name: " << className << endl;
		for( int i=0; i<numExamples; i++ )
		{
			outFile <<  sortedExample[i].first << " ";
			// false positive rate
			outFile << ( positiveWeights[i] / sumOfPositiveWeights ) << " ";
			//true negative rate
			outFile << ( negativeWeights[i] / sumOfNegativeWeights ) << endl;
		}		

		outFile.close();

		// delete the input data file
		if (pData) 
			delete pData;

		vector<ExampleResults*>::iterator it;
		for (it = results.begin(); it != results.end(); ++it)
			delete (*it);
	}

예제 #6

파일 보기

파일: MDDAGClassifier.cpp 프로젝트: busarobi/MDDAG2

void MDDAGClassifier::printConfusionMatrix(const string& dataFileName, const string& shypFileName)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    // get the results
    computeResults( pData, weakHypotheses, results, (int)weakHypotheses.size());

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    if (_verbose > 0)
        cout << "Done!" << endl;

    const int colSize = 7;

    if (_verbose > 0)
    {
        cout << "Raw Confusion Matrix:\n";
        cout << setw(colSize) << "Truth       ";

        for (int l = 0; l < numClasses; ++l)
            cout << setw(colSize) << nor_utils::getAlphanumeric(l);

        cout << "\nClassification\n";

        for (int l = 0; l < numClasses; ++l)
        {
            vector<int> winnerCount(numClasses, 0);
            for (int i = 0; i < numExamples; ++i)
            {
                if ( pData->hasPositiveLabel(i, l) )
                    ++winnerCount[ results[i]->getWinner().first ];
            }

            // class
            cout << setw(colSize) << "           " << nor_utils::getAlphanumeric(l);

            for (int j = 0; j < numClasses; ++j)
                cout << setw(colSize) << winnerCount[j];

            cout << endl;
        }

    }

    cout << "\nMatrix Key:\n";

    // Print the legend
    for (int l = 0; l < numClasses; ++l)
        cout << setw(5) << nor_utils::getAlphanumeric(l) << ": " <<
             pData->getClassMap().getNameFromIdx(l) << "\n";

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}

예제 #7

파일 보기

파일: VJCascadeClassifier.cpp 프로젝트: busarobi/MDDAG2

	void VJCascadeClassifier::run(const string& dataFileName, const string& shypFileName, 
								   int numIterations, const string& outResFileName )
	{
		// loading data
		InputData* pData = loadInputData(dataFileName, shypFileName);
		const int numOfExamples = pData->getNumExamples();
				
		//get the index of positive label		
		const NameMap& namemap = pData->getClassMap();
		_positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName );				
		
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		
		
		
		// The class that loads the weak hypotheses
		UnSerialization us;
		
		// Where to put the weak hypotheses
		vector<vector<BaseLearner*> > weakHypotheses;
		
		// For stagewise thresholds 
		vector<AlphaReal> thresholds(0);
        
		// loads them
		//us.loadHypotheses(shypFileName, weakHypotheses, pData);
		us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData);
		

		// store result
		vector<CascadeOutputInformation> cascadeData(0);
		vector<CascadeOutputInformation>::iterator it;
		
		cascadeData.resize(numOfExamples);		
		for( it=cascadeData.begin(); it != cascadeData.end(); ++it )
		{
			it->active=true;
		}										
		
		if (!_outputInfoFile.empty())
		{
			outputHeader();
		}
		
		for(int stagei=0; stagei < weakHypotheses.size(); ++stagei )
		{
			// for posteriors
			vector<AlphaReal> posteriors(0);		
			
			// calculate the posteriors after stage
			VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex );			
			
			// update the data (posteriors, active element index etc.)
			updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData);
			
			if (!_outputInfoFile.empty())
			{
				_output << stagei + 1 << "\t";
				_output << weakHypotheses[stagei].size() << "\t";
				outputCascadeResult( pData, cascadeData );
			}
			
			int numberOfActiveInstance = 0;
			for( int i = 0; i < numOfExamples; ++i )
				if (cascadeData[i].active) numberOfActiveInstance++;
			
			if (_verbose > 0 )
				cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl;									
		}
				
		vector<vector<int> > confMatrix(2);
		confMatrix[0].resize(2);
		fill( confMatrix[0].begin(), confMatrix[0].end(), 0 );
		confMatrix[1].resize(2);
		fill( confMatrix[1].begin(), confMatrix[1].end(), 0 );
		
	    // print accuracy
		for(int i=0; i<numOfExamples; ++i )
		{		
			vector<Label>& labels = pData->getLabels(i);
			if (labels[_positiveLabelIndex].y>0) // pos label				
				if (cascadeData[i].forecast==1)
					confMatrix[1][1]++;
				else
					confMatrix[1][0]++;
			else // negative label
				if (cascadeData[i].forecast==0)
					confMatrix[0][0]++;
				else
					confMatrix[0][1]++;
		}			
		
		double acc = 100.0 * (confMatrix[0][0] + confMatrix[1][1]) / ((double) numOfExamples);
		// output it
		cout << endl;
		cout << "Error Summary" << endl;
		cout << "=============" << endl;
		
		cout << "Accuracy: " << setprecision(4) << acc << endl;
		cout << setw(10) << "\t" << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << endl;
		cout << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << confMatrix[0][0] << setw(10) << confMatrix[0][1] << endl;
		cout << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << setw(10) << confMatrix[1][0] << setw(10) << confMatrix[1][1] << endl;		
		
		// output forecast 
		if (!outResFileName.empty() ) outputForecast(pData, outResFileName, cascadeData );
						
		// free memory allocation
		vector<vector<BaseLearner*> >::iterator bvIt;
		for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt )
		{
			vector<BaseLearner* >::iterator bIt;
			for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt )
				delete *bIt;
		}
	}

예제 #8

파일 보기

파일: FilterBoostLearner.cpp 프로젝트: junjiek/cmu-exp

    void FilterBoostLearner::run(const nor_utils::Args& args)
    {
        // load the arguments
        this->getArgs(args);

        time_t startTime, currentTime;
        time(&startTime);

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created
        pWeakHypothesisSource->initLearningOptions(args);

        BaseLearner* pConstantWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->initOptions(args);
        pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

        const int numClasses = pTrainingData->getNumClasses();
        const int numExamples = pTrainingData->getNumExamples();
                
        //initialize the margins variable
        _margins.resize( numExamples );
        for( int i=0; i<numExamples; i++ )
        {
            _margins[i].resize( numClasses );
            fill( _margins[i].begin(), _margins[i].end(), 0.0 );
        }


        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
        {
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->initOptions(args);
            pTestData->load(_testFileName, IT_TEST, _verbose);
        }

        // The output information object
        OutputInfo* pOutInfo = NULL;


        if ( !_outputInfoFile.empty() ) 
        {
            // Baseline: constant classifier - goes into 0th iteration

            BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEnergy = pConstantWeakHypothesis->run();

            pOutInfo = new OutputInfo(args);
            pOutInfo->initialize(pTrainingData);

            updateMargins( pTrainingData, pConstantWeakHypothesis );

            if (pTestData)
                pOutInfo->initialize(pTestData);
            pOutInfo->outputHeader(pTrainingData->getClassMap() );

            pOutInfo->outputIteration(-1);
            pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis);

            if (pTestData)
            {
                pOutInfo->separator();
                pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis);
            }
                        
            pOutInfo->outputCurrentTime();

            pOutInfo->endLine();
            pOutInfo->initialize(pTrainingData);
                        
            if (pTestData)
                pOutInfo->initialize(pTestData);
        }
        // reload the previously found weak learners if -resume is set. 
        // otherwise just return 0
        int startingIteration = resumeWeakLearners(pTrainingData);


        Serialization ss(_shypFileName, _isShypCompressed );
        ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

        // perform the resuming if necessary. If not it will just return
        resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;
                                
        ///////////////////////////////////////////////////////////////////////
        // Starting the AdaBoost main loop
        ///////////////////////////////////////////////////////////////////////
        for (int t = startingIteration; t < _numIterations; ++t)
        {                       
            if (_verbose > 1)
                cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;
                
            // create the weak learner
            BaseLearner* pWeakHypothesis;
            BaseLearner* pConstantWeakHypothesis;
            pWeakHypothesis = pWeakHypothesisSource->create();
            pWeakHypothesis->initLearningOptions(args);
            //pTrainingData->clearIndexSet();
            pWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal edge, energy=0.0;
                        
            // create the constant learner
            pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEdge = -numeric_limits<AlphaReal>::max();
                        
            int currentNumberOfUsedData = static_cast<int>(_Cn * log(t+3.0));
                        
            if ( _onlineWeakLearning )
            {
                //check whether the weak learner is a ScalarLeaerner
                try {
                    StochasticLearner* pStochasticLearner = dynamic_cast<StochasticLearner*>(pWeakHypothesis);
                    StochasticLearner* pStochasticConstantWeakHypothesis = dynamic_cast<StochasticLearner*> (pConstantWeakHypothesis);
                                        
                    pStochasticLearner->initLearning();
                    pStochasticConstantWeakHypothesis->initLearning();                                                                              
                                        
                    if (_verbose>1)
                        cout << "Number of random instances: \t" << currentNumberOfUsedData << endl;
                                        
                    // set the weights
                    setWeightToMargins(pTrainingData);
                                        
                    //learning
                    for (int i=0; i<currentNumberOfUsedData; ++i )
                    {
                        int randomIndex = (rand() % pTrainingData->getNumExamples());   
                        //int randomIndex = getRandomIndex();
                        pStochasticLearner->update(randomIndex);
                        pStochasticConstantWeakHypothesis->update(randomIndex);
                    }                                       
                    pStochasticLearner->finishLearning();           
                    pStochasticConstantWeakHypothesis->finishLearning();
                }
                catch (bad_cast& e) {
                    cerr << "The weak learner must be a StochasticLearner!!!" << endl;
                    exit(-1);
                }                                                                                               
            }
            else
            {
                filter( pTrainingData, currentNumberOfUsedData );
                if ( pTrainingData->getNumExamples() < 2 ) 
                {
                    filter( pTrainingData, currentNumberOfUsedData, false );
                }
                                
                if (_verbose > 1)
                {
                    cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
                }
                                
                energy = pWeakHypothesis->run();                                                                
                pConstantWeakHypothesis->run(); 
            }                       

            //estimate edge
            filter( pTrainingData, currentNumberOfUsedData, false );
            edge = pWeakHypothesis->getEdge(true) / 2.0;                                            
            constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
                        
                        
            if ( constantEdge > edge )
            {
                delete pWeakHypothesis;
                pWeakHypothesis = pConstantWeakHypothesis;
                edge = constantEdge;
            } else {
                delete pConstantWeakHypothesis;
            }
                                                                        
            // calculate alpha
            AlphaReal alpha = 0.0;
            alpha = 0.5 * log( ( 1 + edge ) / ( 1 - edge ) );
            pWeakHypothesis->setAlpha( alpha );
            _sumAlpha += alpha;
                        
            if (_verbose > 1)
                cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
            // Output the step-by-step information
            pTrainingData->clearIndexSet();
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

            // Updates the weights and returns the edge
            //AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);

            if (_verbose > 1)
            {
                cout << setprecision(5)
                     << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
                     << "--> Edge  = " << edge << endl
                     << "--> Energy  = " << energy << endl
                    //            << "--> ConstantEnergy  = " << constantEnergy << endl
                    //            << "--> difference  = " << (energy - constantEnergy) << endl
                    ;
            }

            // update the margins
            //saveMargins();
            updateMargins( pTrainingData, pWeakHypothesis );
                        
            // append the current weak learner to strong hypothesis file,
            // that is, serialize it.
            ss.appendHypothesis(t, pWeakHypothesis);

            // Add it to the internal list of weak hypotheses
            _foundHypotheses.push_back(pWeakHypothesis); 

            // check if the time limit has been reached
            if (_maxTime > 0)
            {
                time( &currentTime );
                float diff = difftime(currentTime, startTime); // difftime is in seconds
                diff /= 60; // = minutes

                if (diff > _maxTime)
                {
                    if (_verbose > 0)
                        cout << "Time limit of " << _maxTime 
                             << " minutes has been reached!" << endl;
                    break;     
                }
            } // check for maxtime
            delete pWeakHypothesis;
        }  // loop on iterations
        /////////////////////////////////////////////////////////

        // write the footer of the strong hypothesis file
        ss.writeFooter();

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pTestData)
            delete pTestData;

        if (pOutInfo)
            delete pOutInfo;

        if (_verbose > 0)
            cout << "Learning completed." << endl;
    }

예제 #9

파일 보기

파일: VJCascadeClassifier.cpp 프로젝트: busarobi/MDDAG2

	void VJCascadeClassifier::savePosteriors(const string& dataFileName, const string& shypFileName, 
											  const string& outFileName, int numIterations)
	{
		// loading data
		InputData* pData = loadInputData(dataFileName, shypFileName);
		const int numOfExamples = pData->getNumExamples();
		
		//get the index of positive label		
		const NameMap& namemap = pData->getClassMap();
		_positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName );
		
		
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		
		
		// open outfile
		ofstream outRes(outFileName.c_str());
		if (!outRes.is_open())
		{
			cout << "Cannot open outfile!!! " << outFileName << endl;
		}
				
		
		// The class that loads the weak hypotheses
		UnSerialization us;
		
		// Where to put the weak hypotheses
		vector<vector<BaseLearner*> > weakHypotheses;
        		
		// For stagewise thresholds 
		vector<AlphaReal> thresholds(0);
		// loads them
		//us.loadHypotheses(shypFileName, weakHypotheses, pData);
		us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData);
		
		// output the number of stages
		outRes << "StageNum " << weakHypotheses.size() << endl;
		
		// output original labels
		outRes << "Labels";
		for(int i=0; i<numOfExamples; ++i )
		{		
			vector<Label>& labels = pData->getLabels(i);
			if (labels[_positiveLabelIndex].y>0) // pos label				
				outRes << " 1";
			else
				outRes << " 0";
		}				
		outRes << endl;
		
		// store result
		vector<CascadeOutputInformation> cascadeData(0);
		vector<CascadeOutputInformation>::iterator it;
		
		cascadeData.resize(numOfExamples);		
		for( it=cascadeData.begin(); it != cascadeData.end(); ++it )
		{
			it->active=true;
		}										
		
		for(int stagei=0; stagei < weakHypotheses.size(); ++stagei )
		{
			// for posteriors
			vector<AlphaReal> posteriors(0);		
			
			// calculate the posteriors after stage
			VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex );			
			
			// update the data (posteriors, active element index etc.)
			//VJCascadeLearner::forecastOverAllCascade( pData, posteriors, activeInstances, thresholds[stagei] );
			updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData);
			
			
			int numberOfActiveInstance = 0;
			for( int i = 0; i < numOfExamples; ++i )
				if (cascadeData[i].active) numberOfActiveInstance++;
			
			if (_verbose > 0 )
				cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl;									
			
			// output stats
			outRes << "Stage " << stagei << " " << weakHypotheses[stagei].size() << endl; 

			outRes << "Forecast";
			for(int i=0; i<numOfExamples; ++i )
			{	
				outRes << " " << cascadeData[i].forecast;
			}				
			outRes << endl;

			outRes << "Active";
			for(int i=0; i<numOfExamples; ++i )
			{	
				if( cascadeData[i].active)
					outRes << " 1";
				else
					outRes << " 0";
			}				
			outRes << endl;

			outRes << "Posteriors";
			for(int i=0; i<numOfExamples; ++i )
			{	
				outRes << " " << cascadeData[i].score;
			}				
			outRes << endl;
			
		}						
		
		outRes.close();
		
		// free memory allocation
		vector<vector<BaseLearner*> >::iterator bvIt;
		for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt )
		{
			vector<BaseLearner* >::iterator bIt;
			for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt )
				delete *bIt;
		}
	}

예제 #10

파일 보기

파일: main.cpp 프로젝트: busarobi/MDDAG2

/**
 * The main function. Everything starts here!
 * \param argc The number of arguments.
 * \param argv The arguments.
 * \date 11/11/2005
 */
int main(int argc, const char* argv[])
{
	// initializing the random number generator
	srand ( time(NULL) );
	
	// no need to synchronize with C style stream
	std::ios_base::sync_with_stdio(false);
	
#if STABLE_SORT
	cerr << "WARNING: Stable sort active! It might be slower!!" << endl;
#endif
	
	//////////////////////////////////////////////////////////////////////////
	// Standard arguments
	nor_utils::Args args;
	
	args.setArgumentDiscriminator("--");
	
	args.declareArgument("help");
	args.declareArgument("static");
	
	args.declareArgument("h", "Help", 1, "<optiongroup>");
	
	//////////////////////////////////////////////////////////////////////////
	// Basic Arguments
	
	args.setGroup("Parameters");
	
	args.declareArgument("train", "Performs training.", 2, "<dataFile> <nInterations>");
	args.declareArgument("traintest", "Performs training and test at the same time.", 3, "<trainingDataFile> <testDataFile> <nInterations>");
	args.declareArgument("trainvalidtest", "Performs training and test at the same time.", 4, "<trainingDataFile> <validDataFile> <testDataFile> <nInterations>");
	args.declareArgument("test", "Test the model.", 3, "<dataFile> <numIters> <shypFile>");
	args.declareArgument("test", "Test the model and output the results", 4, "<datafile> <shypFile> <numIters> <outFile>");
	args.declareArgument("cmatrix", "Print the confusion matrix for the given model.", 2, "<dataFile> <shypFile>");
	args.declareArgument("cmatrixfile", "Print the confusion matrix with the class names to a file.", 3, "<dataFile> <shypFile> <outFile>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model.", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model periodically.", 5, "<dataFile> <shypFile> <outFile> <numIters> <period>");	
		
	args.declareArgument("encode", "Save the coefficient vector of boosting individually on each point using ParasiteLearner", 6, "<inputDataFile> <autoassociativeDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>");	
	args.declareArgument("ssfeatures", "Print matrix data for SingleStump-Based weak learners (if numIters=0 it means all of them).", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	
	args.declareArgument( "fileformat", "Defines the type of intput file. Available types are:\n" 
						 "* simple: each line has attributes separated by whitespace and class at the end (DEFAULT!)\n"
						 "* arff: arff filetype. The header file can be specified using --headerfile option\n"
						 "* arffbzip: bziped arff filetype. The header file can be specified using --headerfile option\n"
						 "* svmlight: \n"
						 "(Example: --fileformat simple)",
                         1, "<fileFormat>" );
	
	args.declareArgument("headerfile", "The header file for arff and SVMLight and arff formats.", 1, "header.txt");
	
	args.declareArgument("constant", "Check constant learner in each iteration.", 0, "");
	args.declareArgument("timelimit", "Time limit in minutes", 1, "<minutes>" );
	args.declareArgument("stronglearner", "Available strong learners:\n"
						 "AdaBoost (default)\n"
						 "FilterBoost\n"
                         "SoftCascade\n"
                         "VJcascade\n", 1, "<stronglearner>" );
	
	args.declareArgument("slowresumeprocess", "Computes every statitstic in each iteration (slow resume)\n"
						 "Computes only the statistics in the last iteration (fast resume, default)\n", 0, "" );
	args.declareArgument("weights", "Outputs the weights of instances at the end of the learning process", 1, "<filename>" );
	args.declareArgument("Cn", "Resampling size for FilterBoost (default=300)", 1, "<value>" );
	
	args.declareArgument("onlinetraining", "The weak learner will be trained online\n", 0, "" );
	
	//// ignored for the moment!
	//args.declareArgument("arffheader", "Specify the arff header.", 1, "<arffHeaderFile>");
	
	// for MDDAG
	//args.setGroup("MDDAG");
	args.declareArgument("traintestmddag", "Performs training and test at the same time using mddag.", 5, "<trainingDataFile> <testDataFile> <modelFile> <nIterations> <baseIter>");
	args.declareArgument("policytrainingiter", "The iteration number the policy learner takes.", 1, "<iternum>");
	args.declareArgument("rollouts", "The number of rollouts.", 1, "<num>");
	args.declareArgument("rollouttype", "Rollout type (montecarlo or szatymaz)", 1, "<rollouttype>");
	args.declareArgument("beta", "Trade-off parameter", 1, "<beta>");
	args.declareArgument("outdir", "Output directory.", 1, "<outdir>");
	args.declareArgument("policyalpha", "Alpha for policy array.", 1, "<alpha>");
	args.declareArgument("succrewardtype", "Rewrd type (e01 or hammng)", 1, "<rward_type");
	args.declareArgument("outtrainingerror", "Output training error", 0, "");
	args.declareArgument("epsilon", "Exploration term", 1, "<epsilon>");
	args.declareArgument("updateperc", "Number of component in the policy are updated", 1, "<perc>");
	
	// for VJ cascade
	VJCascadeLearner::declareBaseArguments(args);
    
    // for SoftCascade
    SoftCascadeLearner::declareBaseArguments(args);
	//////////////////////////////////////////////////////////////////////////
	// Options
	
	args.setGroup("I/O Options");
	
	/////////////////////////////////////////////
	// these are valid only for .txt input!
	// they might be removed!
	args.declareArgument("d", "The separation characters between the fields (default: whitespaces).\nExample: -d \"\\t,.-\"\nNote: new-line is always included!", 1, "<separators>");
	args.declareArgument("classend", "The class is the last column instead of the first (or second if -examplelabel is active).");
	args.declareArgument("examplename", "The data file has an additional column (the very first) which contains the 'name' of the example.");
	/////////////////////////////////////////////
	
	args.setGroup("Basic Algorithm Options");
	args.declareArgument("weightpolicy", "Specify the type of weight initialization. The user specified weights (if available) are used inside the policy which can be:\n"
						 "* sharepoints Share the weight equally among data points and between positiv and negative labels (DEFAULT)\n"
						 "* sharelabels Share the weight equally among data points\n"
						 "* proportional Share the weights freely", 1, "<weightType>");
	
	
	args.setGroup("General Options");
	
	args.declareArgument("verbose", "Set the verbose level 0, 1 or 2 (0=no messages, 1=default, 2=all messages).", 1, "<val>");
	args.declareArgument("outputinfo", "Output informations on the algorithm performances during training, on file <filename>.", 1, "<filename>");
	args.declareArgument("outputinfo", "Output specific informations on the algorithm performances during training, on file <filename> <outputlist>. <outputlist> must be a concatenated list of three characters abreviation (ex: err for error, fpr for false positive rate)", 2, "<filename> <outputlist>");

	args.declareArgument("seed", "Defines the seed for the random operations.", 1, "<seedval>");
	
	//////////////////////////////////////////////////////////////////////////
	// Shows the list of available learners
	string learnersComment = "Available learners are:";
	
	vector<string> learnersList;
	BaseLearner::RegisteredLearners().getList(learnersList);
	vector<string>::const_iterator it;
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		learnersComment += "\n ** " + *it;
		// defaultLearner is defined in Defaults.h
		if ( *it == defaultLearner )
			learnersComment += " (DEFAULT)";
	}
	
	args.declareArgument("learnertype", "Change the type of weak learner. " + learnersComment, 1, "<learner>");
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all weak learners
	BaseLearner::declareBaseArguments(args);
	
	////////////////////////////////////////////////////////////////////////////
	//// Weak learners (and input data) arguments
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		args.setGroup(*it + " Options");
		// add weaklearner-specific options
		BaseLearner::RegisteredLearners().getLearner(*it)->declareArguments(args);
	}
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all bandit learner
	GenericBanditAlgorithm::declareBaseArguments(args);
	
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	switch ( args.readArguments(argc, argv) )
	{
		case nor_utils::AOT_NO_ARGUMENTS:
			showBase();
			break;
			
		case nor_utils::AOT_UNKOWN_ARGUMENT:
			exit(1);
			break;
			
		case nor_utils::AOT_INCORRECT_VALUES_NUMBER:
			exit(1);
			break;
			
		case nor_utils::AOT_OK:
			break;
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("help") )
		showHelp(args, learnersList);
	if ( args.hasArgument("static") )
		showStaticConfig();
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("h") )
		showOptionalHelp(args);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	int verbose = 1;
	
	if ( args.hasArgument("verbose") )
		args.getValue("verbose", 0, verbose);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	// defines the seed
	if (args.hasArgument("seed"))
	{
		unsigned int seed = args.getValue<unsigned int>("seed", 0);
		srand(seed);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	GenericStrongLearner* pModel = NULL;
	
	if ( args.hasArgument("train") ||
        args.hasArgument("traintest") || 
	    args.hasArgument("trainvalidtest") ) // for Viola-Jones Cascade
	{
		
		// get the name of the learner
		string baseLearnerName = defaultLearner;
		if ( args.hasArgument("learnertype") )
			args.getValue("learnertype", 0, baseLearnerName);
		
		checkBaseLearner(baseLearnerName);
		if (verbose > 1)    
			cout << "--> Using learner: " << baseLearnerName << endl;
		
		// This hould be changed: the user decides the strong learner
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("traintestmddag") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("traintestmddag", 2);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
		
	}		
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("test") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("test", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
                
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->classify(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("cmatrix") )
	{
		// -cmatrix <dataFile> <shypFile>
		
		string shypFileName = args.getValue<string>("cmatrix", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doConfusionMatrix(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("posteriors") )
	{
		// -posteriors <dataFile> <shypFile> <outFileName>
		string shypFileName = args.getValue<string>("posteriors", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
        
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doPosteriors(args);
	}   
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("ssfeatures") )
	{
		// ONLY for AdaBoostMH classifiers
		
		// -ssfeatures <dataFile> <shypFile> <outFile> <numIters>
		string testFileName = args.getValue<string>("ssfeatures", 0);
		string shypFileName = args.getValue<string>("ssfeatures", 1);
		string outFileName = args.getValue<string>("ssfeatures", 2);
		int numIterations = args.getValue<int>("ssfeatures", 3);
		
		cerr << "ERROR: ssfeatures has been deactivated for the moment!" << endl;
		
		
		//classifier.saveSingleStumpFeatureData(testFileName, shypFileName, outFileName, numIterations);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("encode") )
	{
		
		// --encode <inputDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>
		string labelsFileName = args.getValue<string>("encode", 0);
		string autoassociativeFileName = args.getValue<string>("encode", 1);
		string outputFileName = args.getValue<string>("encode", 2);
		int numIterations = args.getValue<int>("encode", 3);
		string poolFileName = args.getValue<string>("encode", 4);
		int numBaseLearners = args.getValue<int>("encode", 5);
		string outputInfoFile;
		const char* tmpArgv1[] = {"bla", // for ParasiteLearner
			"--pool",
			args.getValue<string>("encode", 4).c_str(),
			args.getValue<string>("encode", 5).c_str()};
		args.readArguments(4,tmpArgv1);
		
		InputData* pAutoassociativeData = new InputData();
		pAutoassociativeData->initOptions(args);
		pAutoassociativeData->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		// for the original labels
		InputData* pLabelsData = new InputData();
		pLabelsData->initOptions(args);
		pLabelsData->load(labelsFileName,IT_TRAIN,verbose);
		
		// set up all the InputData members identically to pAutoassociativeData
		EncodeData* pOnePoint = new EncodeData();
		pOnePoint->initOptions(args);
		pOnePoint->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		const int numExamples = pAutoassociativeData->getNumExamples();
		BaseLearner* pWeakHypothesisSource = 
		BaseLearner::RegisteredLearners().getLearner("ParasiteLearner");
		pWeakHypothesisSource->declareArguments(args);
		
		ParasiteLearner* pWeakHypothesis;
		
		ofstream outFile(outputFileName.c_str());
		if (!outFile.is_open())
		{
			cerr << "ERROR: Cannot open strong hypothesis file <" << outputFileName << ">!" << endl;
			exit(1);
		}
		
		for (int i = 0; i < numExamples ; ++i)
		{
			vector<float> alphas;
			alphas.resize(numBaseLearners);
			fill(alphas.begin(), alphas.end(), 0);
			
			if (verbose >= 1)
				cout << "--> Encoding example no " << (i+1) << endl;
			pOnePoint->resetData();
			pOnePoint->addExample( pAutoassociativeData->getExample(i) );
			AlphaReal energy = 1;
			
			OutputInfo* pOutInfo = NULL;
			if ( args.hasArgument("outputinfo") ) 
			{
				args.getValue("outputinfo", 0, outputInfoFile);
				pOutInfo = new OutputInfo(args);
				pOutInfo->initialize(pOnePoint);
			}
			
			
			for (int t = 0; t < numIterations; ++t)
			{
				pWeakHypothesis = (ParasiteLearner*)pWeakHypothesisSource->create();
				pWeakHypothesis->initLearningOptions(args);
				pWeakHypothesis->setTrainingData(pOnePoint);
				energy *= pWeakHypothesis->run();
				// 	    if (verbose >= 2)
				//  	       cout << "energy = " << energy << endl << flush;
				AdaBoostMHLearner adaBoostMHLearner;
				
				if (i == 0 && t == 0)
				{
					if ( pWeakHypothesis->getBaseLearners().size() < numBaseLearners )
						numBaseLearners = pWeakHypothesis->getBaseLearners().size();
					outFile << "%Hidden representation using autoassociative boosting" << endl << endl;
					outFile << "@RELATION " << outputFileName << endl << endl;
					outFile << "% numBaseLearners" << endl;
					for (int j = 0; j < numBaseLearners; ++j) 
						outFile << "@ATTRIBUTE " << j << "_" <<
						pWeakHypothesis->getBaseLearners()[j]->getId() << " NUMERIC" << endl;
					outFile << "@ATTRIBUTE class {" << pLabelsData->getClassMap().getNameFromIdx(0);
					for (int l = 1; l < pLabelsData->getClassMap().getNumNames(); ++l)
						outFile << ", " << pLabelsData->getClassMap().getNameFromIdx(l);
					outFile << "}" << endl<< endl<< "@DATA" << endl;
				}
				alphas[pWeakHypothesis->getSelectedIndex()] += 
				pWeakHypothesis->getAlpha() * pWeakHypothesis->getSignOfAlpha();
				if ( pOutInfo )
					adaBoostMHLearner.printOutputInfo(pOutInfo, t, pOnePoint, NULL, pWeakHypothesis);
				adaBoostMHLearner.updateWeights(pOnePoint,pWeakHypothesis);
			}
			float sumAlphas = 0;
			for (int j = 0; j < numBaseLearners; ++j)
				sumAlphas += alphas[j];
			
			for (int j = 0; j < numBaseLearners; ++j)
				outFile << alphas[j]/sumAlphas << ",";
			const vector<Label>& labels = pLabelsData->getLabels(i);
			for (int l = 0; l < labels.size(); ++l)
				if (labels[l].y > 0)
					outFile << pLabelsData->getClassMap().getNameFromIdx(labels[l].idx) << endl;
			delete pOutInfo;
		}
		outFile.close();
	}
	
	if (pModel)
		delete pModel;
	
	return 0;
}

예제 #11

파일 보기

파일: FilterBoostLearner.cpp 프로젝트: ShenWei/src

	void FilterBoostLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		time_t startTime, currentTime;
		time(&startTime);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

		const int numClasses = pTrainingData->getNumClasses();
		const int numExamples = pTrainingData->getNumExamples();
		
		//initialize the margins variable
		_margins.resize( numExamples );
		for( int i=0; i<numExamples; i++ )
		{
			_margins[i].resize( numClasses );
			fill( _margins[i].begin(), _margins[i].end(), 0.0 );
		}


		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			float constantEnergy = pConstantWeakHypothesis->run();

			pOutInfo = new OutputInfo(_outputInfoFile);
			pOutInfo->initialize(pTrainingData);

			updateMargins( pTrainingData, pConstantWeakHypothesis );

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader();

			pOutInfo->outputIteration(-1);
			pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputError(pTestData, pConstantWeakHypothesis);
			/*
			pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis);
			
			pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis);

			pOutInfo->outputMAE(pTrainingData);

			if (pTestData)
				pOutInfo->outputMAE(pTestData);
			*/
			pOutInfo->outputCurrentTime();

			pOutInfo->endLine();
			pOutInfo->initialize(pTrainingData);
			
			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			filter( pTrainingData, (int)(_Cn * log(t+2.0)) );
			if ( pTrainingData->getNumExamples() < 2 ) 
			{
				filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			}
			
			if (_verbose > 1)
			{
				cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
			}

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();
			pWeakHypothesis->setTrainingData(pTrainingData);
			float energy = pWeakHypothesis->run();

			BaseLearner* pConstantWeakHypothesis;
			if (_withConstantLearner) // check constant learner if user wants it
			{
				pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				float constantEnergy = pConstantWeakHypothesis->run();
			}

			//estimate edge
			filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			float edge = pWeakHypothesis->getEdge() / 2.0;

			if (_withConstantLearner) // check constant learner if user wants it
			{
				float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
				if ( constantEdge > edge )
				{
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
					edge = constantEdge;
				} else {
					delete pConstantWeakHypothesis;
				}
			}

			// calculate alpha
			float alpha = 0.0;
			alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) );
			pWeakHypothesis->setAlpha( alpha );

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			pTrainingData->clearIndexSet();
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			float gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// update the margins
			updateMargins( pTrainingData, pWeakHypothesis );

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}

예제 #12

파일 보기

파일: MultiMDDAGLearner.cpp 프로젝트: busarobi/MDDAG2

// -------------------------------------------------------------------------
void MultiMDDAGLearner::parallelRollout(const nor_utils::Args& args, InputData* pData, const string fname, int rsize, GenericClassificationBasedPolicy* policy, PolicyResult* result, const int weakLearnerPostion)
{
    vector<AlphaReal> policyError(_shypIter);
    vector<InputData*> rollouts(_shypIter,NULL);

    // generate rollout
    if (_randomNPercent>0)
    {
        vector<int> randomIndices(_shypIter);
        for( int si = 0; si < _shypIter; ++si ) randomIndices[si]=si;
        random_shuffle(randomIndices.begin(), randomIndices.end());

        int ig = static_cast<int>(static_cast<float>(_shypIter * _randomNPercent) / 100.0);
        for( int si = 0; si < ig; ++si )
        {
            stringstream ss(fname);
            //			if (si>0)
            //			{
            //				ss << fname << "_" << si;
            //			} else {
            //				ss << fname;
            //			}


            MDDAGLearner::parallelRollout(args, pData, ss.str(), rsize, policy, result, randomIndices[si]);
            InputData* rolloutTrainingData = getRolloutData( args, ss.str() );

            if (_verbose)
                cout << "---> Rollout size("<< randomIndices[si] << ")" << rolloutTrainingData->getNumExamples() << endl;

            rollouts[randomIndices[si]] = rolloutTrainingData;
        }

    } else {
        for( int si = 0; si < _shypIter; ++si )
        {
            stringstream ss(fname);
            //			if (si>0)
            //			{
            //				ss << fname << "_" << si;
            //			} else {
            //				ss << fname;
            //			}


            MDDAGLearner::parallelRollout(args, pData, ss.str(), rsize, policy, result, si);
            InputData* rolloutTrainingData = getRolloutData( args, ss.str() );

            if (_verbose)
                cout << "---> Rollout size("<< si << ")" << rolloutTrainingData->getNumExamples() << endl;

            rollouts[si] = rolloutTrainingData;
        }
    }
    // update policy
    int numOfUpdatedPolicy = 0;
    for( int si = 0; si < _shypIter; ++si )
    {
        if ((rollouts[si]==NULL) || (rollouts[si]->getNumExamples()<=2)) continue;
        policyError[si] = _policy->trainpolicy( rollouts[si], _baseLearnerName, _trainingIter, si );

        if (_verbose)
            cout << "--> Policy error: pos: " << si << "\t error:\t" << setprecision (4) << policyError[si] << endl;

        numOfUpdatedPolicy++;
    }

    if (_verbose)
        cout << "--> Number of updated policy" << numOfUpdatedPolicy << endl << flush;

    //release rolouts
    for( int si = 0; si < _shypIter; ++si )
    {
        if (rollouts[si]) delete rollouts[si];
    }
}

예제 #13

파일 보기

파일: SoftCascadeLearner.cpp 프로젝트: junjiek/cmu-exp

    void SoftCascadeLearner::run(const nor_utils::Args& args)
    {
        // load the arguments
        this->getArgs(args);
        
        //print cascade properties
        if (_verbose > 0) {
            cout    << "[+] Softcascade parameters :" << endl
                    << "\t --> target detection rate = " << _targetDetectionRate << endl
                    << "\t --> alpha (exp param) = " << _alphaExponentialParameter << endl
                    << "\t --> bootstrap rate = " << _bootstrapRate << endl
                    << endl;
        }
        

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created
        pWeakHypothesisSource->initLearningOptions(args);

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->initOptions(args);
        pTrainingData->load(_trainFileName, IT_TRAIN, 5);

        InputData* pBootstrapData = NULL;
        if (!_bootstrapFileName.empty()) {
            pBootstrapData = pWeakHypothesisSource->createInputData();
            pBootstrapData->initOptions(args);
            pBootstrapData->load(_bootstrapFileName, IT_TRAIN, 5);
        }
        
        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
        {
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->initOptions(args);
            pTestData->load(_testFileName, IT_TEST, 5);
        }

        Serialization ss(_shypFileName, false );
        ss.writeHeader(_baseLearnerName);
        
        
//        outputHeader();
        // The output information object
        OutputInfo* pOutInfo = NULL;

        if ( !_outputInfoFile.empty() ) 
        {
            pOutInfo = new OutputInfo(args, true);
            pOutInfo->setOutputList("sca", &args);
            
            pOutInfo->initialize(pTrainingData);
            
            if (pTestData)
                pOutInfo->initialize(pTestData);
            pOutInfo->outputHeader(pTrainingData->getClassMap(), true, true, false);
            pOutInfo->outputUserHeader("thresh");
            pOutInfo->headerEndLine();
        }
        
        
//        ofstream trainPosteriorsFile;
//        ofstream testPosteriorsFile;
        
        
        const NameMap& namemap = pTrainingData->getClassMap();
        _positiveLabelIndex = namemap.getIdxFromName(_positiveLabelName);

        // FIXME: output posteriors

//        OutputInfo* pTrainPosteriorsOut = NULL;
//        OutputInfo* pTestPosteriorsOut = NULL;
        
//        if (! _trainPosteriorsFileName.empty()) {
//            pTrainPosteriorsOut = new OutputInfo(_trainPosteriorsFileName, "pos", true);
//            pTrainPosteriorsOut->initialize(pTrainingData);
//            dynamic_cast<PosteriorsOutput*>( pTrainPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );
//        }
        
//        if (! _testPosteriorsFileName.empty() && !_testFileName.empty() ) {
//            pTestPosteriorsOut = new OutputInfo(_testPosteriorsFileName, "pos", true);
//            pTestPosteriorsOut->initialize(pTestData);
//            dynamic_cast<PosteriorsOutput*>( pTestPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );            
//        }
        
        const int numExamples = pTrainingData->getNumExamples();

        vector<BaseLearner*> inWeakHypotheses;
        
        if (_fullRun) {            
            // TODO : the full training is implementet, testing is needed
            AdaBoostMHLearner* sHypothesis = new AdaBoostMHLearner();
            sHypothesis->run(args, pTrainingData, _baseLearnerName, _numIterations, inWeakHypotheses );
            delete sHypothesis;
        }
        else { 
            
            cout << "[+] Loading uncalibrated shyp file... ";
            //read the shyp file of the trained classifier
            UnSerialization us;
            us.loadHypotheses(_unCalibratedShypFileName, inWeakHypotheses, pTrainingData);  
            if (_inShypLimit > 0 && _inShypLimit < inWeakHypotheses.size() ) {
                inWeakHypotheses.resize(_inShypLimit);
            }
            if (_numIterations > inWeakHypotheses.size()) {
                _numIterations = inWeakHypotheses.size();
            }
            cout << "weak hypotheses loaded, " << inWeakHypotheses.size() << " retained.\n";
        }
        
        // some initializations
        _foundHypotheses.resize(0);
        double faceRejectionFraction = 0.;
        double estimatedExecutionTime = 0.;
        vector<double> rejectionDistributionVector;

        _rejectionThresholds.resize(0);
        
        
        set<int> trainingIndices;
        for (int i = 0; i < numExamples; i++) {
            trainingIndices.insert(pTrainingData->getRawIndex(i) );
        }
        
        // init v_t (see the paper)
        initializeRejectionDistributionVector(_numIterations, rejectionDistributionVector);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;

        ///////////////////////////////////////////////////////////////////////
        // Starting the SoftCascade main loop
        ///////////////////////////////////////////////////////////////////////
        for (int t = 0; t < _numIterations; ++t)
        {
            if (_verbose > 0)
                cout << "--------------[ iteration " << (t+1) << " ]--------------" << endl;

            faceRejectionFraction += rejectionDistributionVector[t];
            
            cout << "[+] Face rejection tolerated : " << faceRejectionFraction << " | v[t] = " << rejectionDistributionVector[t] << endl;
            
            int numberOfNegatives = pTrainingData->getNumExamplesPerClass(1 - _positiveLabelIndex);
            
            //vector<BaseLearner*>::const_iterator whyIt;
            int selectedIndex = 0;
            AlphaReal bestGap = 0;
            vector<AlphaReal> posteriors;
            computePosteriors(pTrainingData, _foundHypotheses, posteriors, _positiveLabelIndex);
            
            //should use an iterator instead of i
            
            vector<BaseLearner*>::iterator whyIt;
            int i;
            for (i = 0, whyIt = inWeakHypotheses.begin(); whyIt != inWeakHypotheses.end(); ++whyIt, ++i) {
            
                vector<AlphaReal> temporaryPosteriors = posteriors;
                vector<BaseLearner*> temporaryWeakHyp = _foundHypotheses;
                temporaryWeakHyp.push_back(*whyIt);
                updatePosteriors(pTrainingData, *whyIt, temporaryPosteriors, _positiveLabelIndex);
                
                AlphaReal gap = computeSeparationSpan(pTrainingData, temporaryPosteriors, _positiveLabelIndex );

                if (gap > bestGap) {
                    bestGap = gap;
                    selectedIndex = i;
                }
            }
            
            BaseLearner* selectedWeakHypothesis = inWeakHypotheses[selectedIndex];
            
            cout << "[+] Rank of the selected weak hypothesis : " << selectedIndex << endl
                 << "\t ---> edge gap = " << bestGap << endl
                 << "\t ---> alpha = " << selectedWeakHypothesis->getAlpha() << endl;

            //update the stages
            _foundHypotheses.push_back(selectedWeakHypothesis);
            updatePosteriors(pTrainingData, selectedWeakHypothesis, posteriors, _positiveLabelIndex);
            
            double missesFraction;
            AlphaReal r = findBestRejectionThreshold(pTrainingData, posteriors, faceRejectionFraction, missesFraction);
            _rejectionThresholds.push_back(r);
            
            
            // update the output info object
            dynamic_cast<SoftCascadeOutput*>( pOutInfo->getOutputInfoObject("sca") )->appendRejectionThreshold(r);
            
            cout << "[+] Rejection threshold = " << r << endl;
            
            //some updates
            ss.appendHypothesisWithThreshold(t, selectedWeakHypothesis, r);
            faceRejectionFraction -= missesFraction;
            
            inWeakHypotheses.erase(inWeakHypotheses.begin() + selectedIndex);
            double whypCost = 1; //just in case there are different costs for each whyp
            estimatedExecutionTime += whypCost * numberOfNegatives;
            
            // output perf in file
            vector< vector< AlphaReal> > scores(0);
            _output << t + 1 << setw(_sepWidth + 1) << r << setw(_sepWidth);
            
            // update OutputInfo with the new whyp
//            updateOutputInfo(pOutInfo, pTrainingData, selectedWeakHypothesis);
//            if (pTestData) {
//                updateOutputInfo(pOutInfo, pTestData, selectedWeakHypothesis);
//            }
            

            // output the iteration results
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, selectedWeakHypothesis, r);
                        
//            if (pTrainPosteriorsOut) {
//                pTrainPosteriorsOut->setTable(pTrainingData, pOutInfo->getTable(pTrainingData));
//                pTrainPosteriorsOut->outputCustom(pTrainingData);
//            }
//
//            if (pTestPosteriorsOut) {
//                pTestPosteriorsOut->setTable(pTestData, pOutInfo->getTable(pTestData));
//                pTestPosteriorsOut->outputCustom(pTestData);
//            }
            
            
            int leftNegatives = filterDataset(pTrainingData, posteriors, r, trainingIndices);
            if (leftNegatives == 0) {
                cout << endl << "[+] No more negatives.\n";
                break;
            }
            
            if (_bootstrapRate != 0) {
                bootstrapTrainingSet(pTrainingData, pBootstrapData, trainingIndices);
            }

        }  // loop on iterations
        /////////////////////////////////////////////////////////

        // write the footer of the strong hypothesis file
        ss.writeFooter();

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pBootstrapData) {
            delete pBootstrapData;
        }
        if (pTestData)
            delete pTestData;

        if (_verbose > 0)
            cout << "Learning completed." << endl;
    }