C++ (Cpp) BaseLearner::create Beispiele

Programmiersprache: C++ (Cpp)

Klasse / Typ: BaseLearner

Methode / Funktion: create

Beispiele auf hotexamples.com: 13

C++ (Cpp) BaseLearner::create - 13 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die BaseLearner::create, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

getAlpha(18)

create(13)

classify(12)

setTrainingData(8)

initLearningOptions(7)

createInputData(5)

run(5)

getEdge(4)

getName(4)

copyState(2)

setAlpha(2)

createGenericStrongLearner(1)

declareArguments(1)

load(1)

Beispiel #1

Datei anzeigen

Datei: BanditSingleSparseStump.cpp Projekt: busarobi/MDDAG2

		void BanditSingleSparseStump::init() {
			const int numClasses = _pTrainingData->getNumClasses();
			const int numColumns = _pTrainingData->getNumAttributes();
			const int armNumber = _banditAlgo->getArmNumber();

			if ( numColumns < armNumber )
			{
				cerr << "The number of colums smaller than the number of the arms!!!!!!" << endl;
				exit( -1 );
			}

			BaseLearner* pWeakHypothesisSource = 
				BaseLearner::RegisteredLearners().getLearner("SingleSparseStumpLearner");

			_banditAlgo->setArmNumber( numColumns );

			vector<AlphaReal> initialValues( numColumns );

			for( int i=0; i < numColumns; i++ )
			{
				SingleSparseStumpLearner* singleStump = dynamic_cast<SingleSparseStumpLearner*>( pWeakHypothesisSource->create());

				singleStump->setTrainingData(_pTrainingData);
				AlphaReal energy = singleStump->run( i );
				AlphaReal edge = singleStump->getEdge();
				AlphaReal reward = getRewardFromEdge( (AlphaReal) edge );

				initialValues[i] = reward;

				delete singleStump;
			}

			_banditAlgo->initialize( initialValues );

	}

Beispiel #2

Datei anzeigen

Datei: TreeLearner.cpp Projekt: busarobi/MDDAG2

	// -----------------------------------------------------------------------
	void TreeLearner::calculateEdgeImprovement( NodePoint& node ) {
		node._extended = true;
		_pTrainingData->loadIndexSet( node._learnerIdxSet );
		
		// run constant
		BaseLearner* pConstantWeakHypothesisSource =
		BaseLearner::RegisteredLearners().getLearner("ConstantLearner");
		
		node._constantLearner = dynamic_cast<ScalarLearner*>( pConstantWeakHypothesisSource->create());
		node._constantLearner->setTrainingData(_pTrainingData);
		node._constantEnergy = node._constantLearner->run();
		
		node._constantEdge = node._constantLearner->getEdge(false);
		node._learner = NULL;
		
		if ( ! _pTrainingData->isSamplesFromOneClass() ) {
			node._learner = dynamic_cast<ScalarLearner*>(_pScalaWeakHypothesisSource->create());
			_pScalaWeakHypothesisSource->subCopyState(node._learner);
			node._learner->setTrainingData(_pTrainingData);
			
			node._learnerEnergy = node._learner->run();
			if ( node._learnerEnergy == node._learnerEnergy ) { // isnan
				node._edge = node._learner->getEdge(false);
				node._edgeImprovement = node._edge - node._constantEdge;								
			} else {
				node._edge = numeric_limits<AlphaReal>::signaling_NaN();
				node._edgeImprovement = -numeric_limits<AlphaReal>::max();
			}
		} else {
			node._edge = numeric_limits<AlphaReal>::signaling_NaN();
			node._edgeImprovement = 0.0;			
		}
		
	}

Beispiel #3

Datei anzeigen

Datei: BanditSingleStumpLearner.cpp Projekt: busarobi/MDDAG

	// ------------------------------------------------------------------------------
	void BanditSingleStumpLearner::estimatePayoffs( vector<AlphaReal>& payoffs )
	{
		set<int> oldIndexSet;
		set<int> randomIndexSet;
		const int numExamples = _pTrainingData->getNumExamples();
		const int numColumns = _pTrainingData->getNumAttributes();

		_pTrainingData->getIndexSet( oldIndexSet );
		int numSubset = static_cast<int>( static_cast<double>(numExamples) * _percentage );
		
		if ( numSubset < 2 ) {
			//use the whole dataset, do nothing
		} else {
			for (int j = 0; j < numExamples; ++j)
			{
				// Tricky way to select numOfDimensions columns randomly out of numColumns
				int rest = numExamples - j;
				AlphaReal r = rand()/static_cast<AlphaReal>(RAND_MAX);

				if ( static_cast<AlphaReal>(numSubset) / rest > r ) 
				{
					--numSubset;
					randomIndexSet.insert( j );
				}
			}
			_pTrainingData->loadIndexSet( randomIndexSet );
		}
		
		
		payoffs.resize( numColumns );

		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("SingleStumpLearner");		
		
		for( int i=0; i < numColumns; i++ )
		{
			if ( payoffs[i] > 0.0 ) continue;

			SingleStumpLearner* singleStump = dynamic_cast<SingleStumpLearner*>( pWeakHypothesisSource->create());
			
			singleStump->setTrainingData(_pTrainingData);
			AlphaReal energy = singleStump->run( i );
			AlphaReal edge = singleStump->getEdge();
			AlphaReal reward = getRewardFromEdge( (float) edge );
			
			payoffs[i] = reward;			
			delete singleStump;
		}

		//restore the database
		_pTrainingData->loadIndexSet( oldIndexSet );
	}

Beispiel #4

Datei anzeigen

Datei: ProductLearner.cpp Projekt: busarobi/MDDAG2

	void ProductLearner::initLearningOptions(const nor_utils::Args& args)
	{
		BaseLearner::initLearningOptions(args);

		string baseLearnerName;
		args.getValue("baselearnertype", 0, baseLearnerName);   
		args.getValue("baselearnertype", 1, _numBaseLearners);   

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pWeakHypothesisSource->initLearningOptions(args);

		for( int ib = 0; ib < _numBaseLearners; ++ib ) {
			_baseLearners.push_back(pWeakHypothesisSource->create());
			_baseLearners[ib]->initLearningOptions(args);
		}
	}

Beispiel #5

Datei anzeigen

Datei: TreeLearnerUCT.cpp Projekt: junjiek/cmu-exp

void TreeLearnerUCT::initLearningOptions(const nor_utils::Args& args)
{
    BaseLearner::initLearningOptions(args);

    string baseLearnerName;
    args.getValue("baselearnertype", 0, baseLearnerName);
    args.getValue("baselearnertype", 1, _numBaseLearners);

    // get the registered weak learner (type from name)
    BaseLearner* pWeakHypothesisSource =
        BaseLearner::RegisteredLearners().getLearner(baseLearnerName);

    for( int ib = 0; ib < _numBaseLearners; ++ib ) {
        _baseLearners.push_back(pWeakHypothesisSource->create());
        _baseLearners[ib]->initLearningOptions(args);

        vector< int > tmpVector( 2, -1 );
        _idxPairs.push_back( tmpVector );
    }

    string updateRule = "";
    if ( args.hasArgument( "updaterule" ) )
        args.getValue("updaterule", 0, updateRule );

    if ( updateRule.compare( "edge" ) == 0 )
        _updateRule = EDGE_SQUARE;
    else if ( updateRule.compare( "alphas" ) == 0 )
        _updateRule = ALPHAS;
    else if ( updateRule.compare( "edgesquare" ) == 0 )
        _updateRule = ESQUARE;
    else {
        cerr << "Unknown update rule in ProductLearnerUCT (set to default [edge]" << endl;
        _updateRule = EDGE_SQUARE;
    }

}

Beispiel #6

Datei anzeigen

Datei: FilterBoostLearner.cpp Projekt: junjiek/cmu-exp

    void FilterBoostLearner::run(const nor_utils::Args& args)
    {
        // load the arguments
        this->getArgs(args);

        time_t startTime, currentTime;
        time(&startTime);

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created
        pWeakHypothesisSource->initLearningOptions(args);

        BaseLearner* pConstantWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->initOptions(args);
        pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

        const int numClasses = pTrainingData->getNumClasses();
        const int numExamples = pTrainingData->getNumExamples();
                
        //initialize the margins variable
        _margins.resize( numExamples );
        for( int i=0; i<numExamples; i++ )
        {
            _margins[i].resize( numClasses );
            fill( _margins[i].begin(), _margins[i].end(), 0.0 );
        }


        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
        {
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->initOptions(args);
            pTestData->load(_testFileName, IT_TEST, _verbose);
        }

        // The output information object
        OutputInfo* pOutInfo = NULL;


        if ( !_outputInfoFile.empty() ) 
        {
            // Baseline: constant classifier - goes into 0th iteration

            BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEnergy = pConstantWeakHypothesis->run();

            pOutInfo = new OutputInfo(args);
            pOutInfo->initialize(pTrainingData);

            updateMargins( pTrainingData, pConstantWeakHypothesis );

            if (pTestData)
                pOutInfo->initialize(pTestData);
            pOutInfo->outputHeader(pTrainingData->getClassMap() );

            pOutInfo->outputIteration(-1);
            pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis);

            if (pTestData)
            {
                pOutInfo->separator();
                pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis);
            }
                        
            pOutInfo->outputCurrentTime();

            pOutInfo->endLine();
            pOutInfo->initialize(pTrainingData);
                        
            if (pTestData)
                pOutInfo->initialize(pTestData);
        }
        // reload the previously found weak learners if -resume is set. 
        // otherwise just return 0
        int startingIteration = resumeWeakLearners(pTrainingData);


        Serialization ss(_shypFileName, _isShypCompressed );
        ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

        // perform the resuming if necessary. If not it will just return
        resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;
                                
        ///////////////////////////////////////////////////////////////////////
        // Starting the AdaBoost main loop
        ///////////////////////////////////////////////////////////////////////
        for (int t = startingIteration; t < _numIterations; ++t)
        {                       
            if (_verbose > 1)
                cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;
                
            // create the weak learner
            BaseLearner* pWeakHypothesis;
            BaseLearner* pConstantWeakHypothesis;
            pWeakHypothesis = pWeakHypothesisSource->create();
            pWeakHypothesis->initLearningOptions(args);
            //pTrainingData->clearIndexSet();
            pWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal edge, energy=0.0;
                        
            // create the constant learner
            pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEdge = -numeric_limits<AlphaReal>::max();
                        
            int currentNumberOfUsedData = static_cast<int>(_Cn * log(t+3.0));
                        
            if ( _onlineWeakLearning )
            {
                //check whether the weak learner is a ScalarLeaerner
                try {
                    StochasticLearner* pStochasticLearner = dynamic_cast<StochasticLearner*>(pWeakHypothesis);
                    StochasticLearner* pStochasticConstantWeakHypothesis = dynamic_cast<StochasticLearner*> (pConstantWeakHypothesis);
                                        
                    pStochasticLearner->initLearning();
                    pStochasticConstantWeakHypothesis->initLearning();                                                                              
                                        
                    if (_verbose>1)
                        cout << "Number of random instances: \t" << currentNumberOfUsedData << endl;
                                        
                    // set the weights
                    setWeightToMargins(pTrainingData);
                                        
                    //learning
                    for (int i=0; i<currentNumberOfUsedData; ++i )
                    {
                        int randomIndex = (rand() % pTrainingData->getNumExamples());   
                        //int randomIndex = getRandomIndex();
                        pStochasticLearner->update(randomIndex);
                        pStochasticConstantWeakHypothesis->update(randomIndex);
                    }                                       
                    pStochasticLearner->finishLearning();           
                    pStochasticConstantWeakHypothesis->finishLearning();
                }
                catch (bad_cast& e) {
                    cerr << "The weak learner must be a StochasticLearner!!!" << endl;
                    exit(-1);
                }                                                                                               
            }
            else
            {
                filter( pTrainingData, currentNumberOfUsedData );
                if ( pTrainingData->getNumExamples() < 2 ) 
                {
                    filter( pTrainingData, currentNumberOfUsedData, false );
                }
                                
                if (_verbose > 1)
                {
                    cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
                }
                                
                energy = pWeakHypothesis->run();                                                                
                pConstantWeakHypothesis->run(); 
            }                       

            //estimate edge
            filter( pTrainingData, currentNumberOfUsedData, false );
            edge = pWeakHypothesis->getEdge(true) / 2.0;                                            
            constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
                        
                        
            if ( constantEdge > edge )
            {
                delete pWeakHypothesis;
                pWeakHypothesis = pConstantWeakHypothesis;
                edge = constantEdge;
            } else {
                delete pConstantWeakHypothesis;
            }
                                                                        
            // calculate alpha
            AlphaReal alpha = 0.0;
            alpha = 0.5 * log( ( 1 + edge ) / ( 1 - edge ) );
            pWeakHypothesis->setAlpha( alpha );
            _sumAlpha += alpha;
                        
            if (_verbose > 1)
                cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
            // Output the step-by-step information
            pTrainingData->clearIndexSet();
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

            // Updates the weights and returns the edge
            //AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);

            if (_verbose > 1)
            {
                cout << setprecision(5)
                     << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
                     << "--> Edge  = " << edge << endl
                     << "--> Energy  = " << energy << endl
                    //            << "--> ConstantEnergy  = " << constantEnergy << endl
                    //            << "--> difference  = " << (energy - constantEnergy) << endl
                    ;
            }

            // update the margins
            //saveMargins();
            updateMargins( pTrainingData, pWeakHypothesis );
                        
            // append the current weak learner to strong hypothesis file,
            // that is, serialize it.
            ss.appendHypothesis(t, pWeakHypothesis);

            // Add it to the internal list of weak hypotheses
            _foundHypotheses.push_back(pWeakHypothesis); 

            // check if the time limit has been reached
            if (_maxTime > 0)
            {
                time( &currentTime );
                float diff = difftime(currentTime, startTime); // difftime is in seconds
                diff /= 60; // = minutes

                if (diff > _maxTime)
                {
                    if (_verbose > 0)
                        cout << "Time limit of " << _maxTime 
                             << " minutes has been reached!" << endl;
                    break;     
                }
            } // check for maxtime
            delete pWeakHypothesis;
        }  // loop on iterations
        /////////////////////////////////////////////////////////

        // write the footer of the strong hypothesis file
        ss.writeFooter();

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pTestData)
            delete pTestData;

        if (pOutInfo)
            delete pOutInfo;

        if (_verbose > 0)
            cout << "Learning completed." << endl;
    }

Beispiel #7

Datei anzeigen

Datei: main.cpp Projekt: busarobi/MDDAG2

/**
 * The main function. Everything starts here!
 * \param argc The number of arguments.
 * \param argv The arguments.
 * \date 11/11/2005
 */
int main(int argc, const char* argv[])
{
	// initializing the random number generator
	srand ( time(NULL) );
	
	// no need to synchronize with C style stream
	std::ios_base::sync_with_stdio(false);
	
#if STABLE_SORT
	cerr << "WARNING: Stable sort active! It might be slower!!" << endl;
#endif
	
	//////////////////////////////////////////////////////////////////////////
	// Standard arguments
	nor_utils::Args args;
	
	args.setArgumentDiscriminator("--");
	
	args.declareArgument("help");
	args.declareArgument("static");
	
	args.declareArgument("h", "Help", 1, "<optiongroup>");
	
	//////////////////////////////////////////////////////////////////////////
	// Basic Arguments
	
	args.setGroup("Parameters");
	
	args.declareArgument("train", "Performs training.", 2, "<dataFile> <nInterations>");
	args.declareArgument("traintest", "Performs training and test at the same time.", 3, "<trainingDataFile> <testDataFile> <nInterations>");
	args.declareArgument("trainvalidtest", "Performs training and test at the same time.", 4, "<trainingDataFile> <validDataFile> <testDataFile> <nInterations>");
	args.declareArgument("test", "Test the model.", 3, "<dataFile> <numIters> <shypFile>");
	args.declareArgument("test", "Test the model and output the results", 4, "<datafile> <shypFile> <numIters> <outFile>");
	args.declareArgument("cmatrix", "Print the confusion matrix for the given model.", 2, "<dataFile> <shypFile>");
	args.declareArgument("cmatrixfile", "Print the confusion matrix with the class names to a file.", 3, "<dataFile> <shypFile> <outFile>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model.", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model periodically.", 5, "<dataFile> <shypFile> <outFile> <numIters> <period>");	
		
	args.declareArgument("encode", "Save the coefficient vector of boosting individually on each point using ParasiteLearner", 6, "<inputDataFile> <autoassociativeDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>");	
	args.declareArgument("ssfeatures", "Print matrix data for SingleStump-Based weak learners (if numIters=0 it means all of them).", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	
	args.declareArgument( "fileformat", "Defines the type of intput file. Available types are:\n" 
						 "* simple: each line has attributes separated by whitespace and class at the end (DEFAULT!)\n"
						 "* arff: arff filetype. The header file can be specified using --headerfile option\n"
						 "* arffbzip: bziped arff filetype. The header file can be specified using --headerfile option\n"
						 "* svmlight: \n"
						 "(Example: --fileformat simple)",
                         1, "<fileFormat>" );
	
	args.declareArgument("headerfile", "The header file for arff and SVMLight and arff formats.", 1, "header.txt");
	
	args.declareArgument("constant", "Check constant learner in each iteration.", 0, "");
	args.declareArgument("timelimit", "Time limit in minutes", 1, "<minutes>" );
	args.declareArgument("stronglearner", "Available strong learners:\n"
						 "AdaBoost (default)\n"
						 "FilterBoost\n"
                         "SoftCascade\n"
                         "VJcascade\n", 1, "<stronglearner>" );
	
	args.declareArgument("slowresumeprocess", "Computes every statitstic in each iteration (slow resume)\n"
						 "Computes only the statistics in the last iteration (fast resume, default)\n", 0, "" );
	args.declareArgument("weights", "Outputs the weights of instances at the end of the learning process", 1, "<filename>" );
	args.declareArgument("Cn", "Resampling size for FilterBoost (default=300)", 1, "<value>" );
	
	args.declareArgument("onlinetraining", "The weak learner will be trained online\n", 0, "" );
	
	//// ignored for the moment!
	//args.declareArgument("arffheader", "Specify the arff header.", 1, "<arffHeaderFile>");
	
	// for MDDAG
	//args.setGroup("MDDAG");
	args.declareArgument("traintestmddag", "Performs training and test at the same time using mddag.", 5, "<trainingDataFile> <testDataFile> <modelFile> <nIterations> <baseIter>");
	args.declareArgument("policytrainingiter", "The iteration number the policy learner takes.", 1, "<iternum>");
	args.declareArgument("rollouts", "The number of rollouts.", 1, "<num>");
	args.declareArgument("rollouttype", "Rollout type (montecarlo or szatymaz)", 1, "<rollouttype>");
	args.declareArgument("beta", "Trade-off parameter", 1, "<beta>");
	args.declareArgument("outdir", "Output directory.", 1, "<outdir>");
	args.declareArgument("policyalpha", "Alpha for policy array.", 1, "<alpha>");
	args.declareArgument("succrewardtype", "Rewrd type (e01 or hammng)", 1, "<rward_type");
	args.declareArgument("outtrainingerror", "Output training error", 0, "");
	args.declareArgument("epsilon", "Exploration term", 1, "<epsilon>");
	args.declareArgument("updateperc", "Number of component in the policy are updated", 1, "<perc>");
	
	// for VJ cascade
	VJCascadeLearner::declareBaseArguments(args);
    
    // for SoftCascade
    SoftCascadeLearner::declareBaseArguments(args);
	//////////////////////////////////////////////////////////////////////////
	// Options
	
	args.setGroup("I/O Options");
	
	/////////////////////////////////////////////
	// these are valid only for .txt input!
	// they might be removed!
	args.declareArgument("d", "The separation characters between the fields (default: whitespaces).\nExample: -d \"\\t,.-\"\nNote: new-line is always included!", 1, "<separators>");
	args.declareArgument("classend", "The class is the last column instead of the first (or second if -examplelabel is active).");
	args.declareArgument("examplename", "The data file has an additional column (the very first) which contains the 'name' of the example.");
	/////////////////////////////////////////////
	
	args.setGroup("Basic Algorithm Options");
	args.declareArgument("weightpolicy", "Specify the type of weight initialization. The user specified weights (if available) are used inside the policy which can be:\n"
						 "* sharepoints Share the weight equally among data points and between positiv and negative labels (DEFAULT)\n"
						 "* sharelabels Share the weight equally among data points\n"
						 "* proportional Share the weights freely", 1, "<weightType>");
	
	
	args.setGroup("General Options");
	
	args.declareArgument("verbose", "Set the verbose level 0, 1 or 2 (0=no messages, 1=default, 2=all messages).", 1, "<val>");
	args.declareArgument("outputinfo", "Output informations on the algorithm performances during training, on file <filename>.", 1, "<filename>");
	args.declareArgument("outputinfo", "Output specific informations on the algorithm performances during training, on file <filename> <outputlist>. <outputlist> must be a concatenated list of three characters abreviation (ex: err for error, fpr for false positive rate)", 2, "<filename> <outputlist>");

	args.declareArgument("seed", "Defines the seed for the random operations.", 1, "<seedval>");
	
	//////////////////////////////////////////////////////////////////////////
	// Shows the list of available learners
	string learnersComment = "Available learners are:";
	
	vector<string> learnersList;
	BaseLearner::RegisteredLearners().getList(learnersList);
	vector<string>::const_iterator it;
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		learnersComment += "\n ** " + *it;
		// defaultLearner is defined in Defaults.h
		if ( *it == defaultLearner )
			learnersComment += " (DEFAULT)";
	}
	
	args.declareArgument("learnertype", "Change the type of weak learner. " + learnersComment, 1, "<learner>");
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all weak learners
	BaseLearner::declareBaseArguments(args);
	
	////////////////////////////////////////////////////////////////////////////
	//// Weak learners (and input data) arguments
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		args.setGroup(*it + " Options");
		// add weaklearner-specific options
		BaseLearner::RegisteredLearners().getLearner(*it)->declareArguments(args);
	}
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all bandit learner
	GenericBanditAlgorithm::declareBaseArguments(args);
	
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	switch ( args.readArguments(argc, argv) )
	{
		case nor_utils::AOT_NO_ARGUMENTS:
			showBase();
			break;
			
		case nor_utils::AOT_UNKOWN_ARGUMENT:
			exit(1);
			break;
			
		case nor_utils::AOT_INCORRECT_VALUES_NUMBER:
			exit(1);
			break;
			
		case nor_utils::AOT_OK:
			break;
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("help") )
		showHelp(args, learnersList);
	if ( args.hasArgument("static") )
		showStaticConfig();
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("h") )
		showOptionalHelp(args);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	int verbose = 1;
	
	if ( args.hasArgument("verbose") )
		args.getValue("verbose", 0, verbose);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	// defines the seed
	if (args.hasArgument("seed"))
	{
		unsigned int seed = args.getValue<unsigned int>("seed", 0);
		srand(seed);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	GenericStrongLearner* pModel = NULL;
	
	if ( args.hasArgument("train") ||
        args.hasArgument("traintest") || 
	    args.hasArgument("trainvalidtest") ) // for Viola-Jones Cascade
	{
		
		// get the name of the learner
		string baseLearnerName = defaultLearner;
		if ( args.hasArgument("learnertype") )
			args.getValue("learnertype", 0, baseLearnerName);
		
		checkBaseLearner(baseLearnerName);
		if (verbose > 1)    
			cout << "--> Using learner: " << baseLearnerName << endl;
		
		// This hould be changed: the user decides the strong learner
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("traintestmddag") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("traintestmddag", 2);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
		
	}		
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("test") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("test", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
                
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->classify(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("cmatrix") )
	{
		// -cmatrix <dataFile> <shypFile>
		
		string shypFileName = args.getValue<string>("cmatrix", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doConfusionMatrix(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("posteriors") )
	{
		// -posteriors <dataFile> <shypFile> <outFileName>
		string shypFileName = args.getValue<string>("posteriors", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
        
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doPosteriors(args);
	}   
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("ssfeatures") )
	{
		// ONLY for AdaBoostMH classifiers
		
		// -ssfeatures <dataFile> <shypFile> <outFile> <numIters>
		string testFileName = args.getValue<string>("ssfeatures", 0);
		string shypFileName = args.getValue<string>("ssfeatures", 1);
		string outFileName = args.getValue<string>("ssfeatures", 2);
		int numIterations = args.getValue<int>("ssfeatures", 3);
		
		cerr << "ERROR: ssfeatures has been deactivated for the moment!" << endl;
		
		
		//classifier.saveSingleStumpFeatureData(testFileName, shypFileName, outFileName, numIterations);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("encode") )
	{
		
		// --encode <inputDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>
		string labelsFileName = args.getValue<string>("encode", 0);
		string autoassociativeFileName = args.getValue<string>("encode", 1);
		string outputFileName = args.getValue<string>("encode", 2);
		int numIterations = args.getValue<int>("encode", 3);
		string poolFileName = args.getValue<string>("encode", 4);
		int numBaseLearners = args.getValue<int>("encode", 5);
		string outputInfoFile;
		const char* tmpArgv1[] = {"bla", // for ParasiteLearner
			"--pool",
			args.getValue<string>("encode", 4).c_str(),
			args.getValue<string>("encode", 5).c_str()};
		args.readArguments(4,tmpArgv1);
		
		InputData* pAutoassociativeData = new InputData();
		pAutoassociativeData->initOptions(args);
		pAutoassociativeData->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		// for the original labels
		InputData* pLabelsData = new InputData();
		pLabelsData->initOptions(args);
		pLabelsData->load(labelsFileName,IT_TRAIN,verbose);
		
		// set up all the InputData members identically to pAutoassociativeData
		EncodeData* pOnePoint = new EncodeData();
		pOnePoint->initOptions(args);
		pOnePoint->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		const int numExamples = pAutoassociativeData->getNumExamples();
		BaseLearner* pWeakHypothesisSource = 
		BaseLearner::RegisteredLearners().getLearner("ParasiteLearner");
		pWeakHypothesisSource->declareArguments(args);
		
		ParasiteLearner* pWeakHypothesis;
		
		ofstream outFile(outputFileName.c_str());
		if (!outFile.is_open())
		{
			cerr << "ERROR: Cannot open strong hypothesis file <" << outputFileName << ">!" << endl;
			exit(1);
		}
		
		for (int i = 0; i < numExamples ; ++i)
		{
			vector<float> alphas;
			alphas.resize(numBaseLearners);
			fill(alphas.begin(), alphas.end(), 0);
			
			if (verbose >= 1)
				cout << "--> Encoding example no " << (i+1) << endl;
			pOnePoint->resetData();
			pOnePoint->addExample( pAutoassociativeData->getExample(i) );
			AlphaReal energy = 1;
			
			OutputInfo* pOutInfo = NULL;
			if ( args.hasArgument("outputinfo") ) 
			{
				args.getValue("outputinfo", 0, outputInfoFile);
				pOutInfo = new OutputInfo(args);
				pOutInfo->initialize(pOnePoint);
			}
			
			
			for (int t = 0; t < numIterations; ++t)
			{
				pWeakHypothesis = (ParasiteLearner*)pWeakHypothesisSource->create();
				pWeakHypothesis->initLearningOptions(args);
				pWeakHypothesis->setTrainingData(pOnePoint);
				energy *= pWeakHypothesis->run();
				// 	    if (verbose >= 2)
				//  	       cout << "energy = " << energy << endl << flush;
				AdaBoostMHLearner adaBoostMHLearner;
				
				if (i == 0 && t == 0)
				{
					if ( pWeakHypothesis->getBaseLearners().size() < numBaseLearners )
						numBaseLearners = pWeakHypothesis->getBaseLearners().size();
					outFile << "%Hidden representation using autoassociative boosting" << endl << endl;
					outFile << "@RELATION " << outputFileName << endl << endl;
					outFile << "% numBaseLearners" << endl;
					for (int j = 0; j < numBaseLearners; ++j) 
						outFile << "@ATTRIBUTE " << j << "_" <<
						pWeakHypothesis->getBaseLearners()[j]->getId() << " NUMERIC" << endl;
					outFile << "@ATTRIBUTE class {" << pLabelsData->getClassMap().getNameFromIdx(0);
					for (int l = 1; l < pLabelsData->getClassMap().getNumNames(); ++l)
						outFile << ", " << pLabelsData->getClassMap().getNameFromIdx(l);
					outFile << "}" << endl<< endl<< "@DATA" << endl;
				}
				alphas[pWeakHypothesis->getSelectedIndex()] += 
				pWeakHypothesis->getAlpha() * pWeakHypothesis->getSignOfAlpha();
				if ( pOutInfo )
					adaBoostMHLearner.printOutputInfo(pOutInfo, t, pOnePoint, NULL, pWeakHypothesis);
				adaBoostMHLearner.updateWeights(pOnePoint,pWeakHypothesis);
			}
			float sumAlphas = 0;
			for (int j = 0; j < numBaseLearners; ++j)
				sumAlphas += alphas[j];
			
			for (int j = 0; j < numBaseLearners; ++j)
				outFile << alphas[j]/sumAlphas << ",";
			const vector<Label>& labels = pLabelsData->getLabels(i);
			for (int l = 0; l < labels.size(); ++l)
				if (labels[l].y > 0)
					outFile << pLabelsData->getClassMap().getNameFromIdx(labels[l].idx) << endl;
			delete pOutInfo;
		}
		outFile.close();
	}
	
	if (pModel)
		delete pModel;
	
	return 0;
}

Beispiel #8

Datei anzeigen

Datei: FilterBoostLearner.cpp Projekt: ShenWei/src

	void FilterBoostLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		time_t startTime, currentTime;
		time(&startTime);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

		const int numClasses = pTrainingData->getNumClasses();
		const int numExamples = pTrainingData->getNumExamples();
		
		//initialize the margins variable
		_margins.resize( numExamples );
		for( int i=0; i<numExamples; i++ )
		{
			_margins[i].resize( numClasses );
			fill( _margins[i].begin(), _margins[i].end(), 0.0 );
		}


		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			float constantEnergy = pConstantWeakHypothesis->run();

			pOutInfo = new OutputInfo(_outputInfoFile);
			pOutInfo->initialize(pTrainingData);

			updateMargins( pTrainingData, pConstantWeakHypothesis );

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader();

			pOutInfo->outputIteration(-1);
			pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputError(pTestData, pConstantWeakHypothesis);
			/*
			pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis);
			
			pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis);

			pOutInfo->outputMAE(pTrainingData);

			if (pTestData)
				pOutInfo->outputMAE(pTestData);
			*/
			pOutInfo->outputCurrentTime();

			pOutInfo->endLine();
			pOutInfo->initialize(pTrainingData);
			
			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			filter( pTrainingData, (int)(_Cn * log(t+2.0)) );
			if ( pTrainingData->getNumExamples() < 2 ) 
			{
				filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			}
			
			if (_verbose > 1)
			{
				cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
			}

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();
			pWeakHypothesis->setTrainingData(pTrainingData);
			float energy = pWeakHypothesis->run();

			BaseLearner* pConstantWeakHypothesis;
			if (_withConstantLearner) // check constant learner if user wants it
			{
				pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				float constantEnergy = pConstantWeakHypothesis->run();
			}

			//estimate edge
			filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			float edge = pWeakHypothesis->getEdge() / 2.0;

			if (_withConstantLearner) // check constant learner if user wants it
			{
				float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
				if ( constantEdge > edge )
				{
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
					edge = constantEdge;
				} else {
					delete pConstantWeakHypothesis;
				}
			}

			// calculate alpha
			float alpha = 0.0;
			alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) );
			pWeakHypothesis->setAlpha( alpha );

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			pTrainingData->clearIndexSet();
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			float gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// update the margins
			updateMargins( pTrainingData, pWeakHypothesis );

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}

Beispiel #9

Datei anzeigen

Datei: TreeLearnerUCT.cpp Projekt: junjiek/cmu-exp

void TreeLearnerUCT::calculateChildrenAndEnergies( NodePointUCT& bLearner, int depthIndex ) {
    bLearner._extended = true;
    _pTrainingData->loadIndexSet( bLearner._learnerIdxSet );

    //separate the dataset
    set< int > idxPos, idxNeg;
    idxPos.clear();
    idxNeg.clear();
    float phix;

    for (int i = 0; i < _pTrainingData->getNumExamples(); ++i) {
        // this returns the phi value of classifier
        phix = bLearner._learner->classify(_pTrainingData,i,0);
        if ( phix <  0 )
            idxNeg.insert( _pTrainingData->getRawIndex( i ) );
        else if ( phix > 0 ) { // have to redo the multiplications, haven't been tested
            idxPos.insert( _pTrainingData->getRawIndex( i ) );
        }
    }

    if ( (idxPos.size() < 1 ) || (idxNeg.size() < 1 ) ) {
        //retval.clear();
        bLearner._extended = false;
        //return retval;
    }

    _pTrainingData->loadIndexSet( idxPos );

    if ( ! _pTrainingData->isSamplesFromOneClass() ) {
        BaseLearner* posLearner = _baseLearners[0]->copyState();

        //posLearner->run();
        dynamic_cast<FeaturewiseLearner*>(posLearner)->run( depthIndex );
        //
        //float posEdge = getEdge( posLearner, _pTrainingData );
        posLearner->setTrainingData( _pTrainingData );
        bLearner._leftEdge = posLearner->getEdge();

        //tmpPair.first = posEdge;
        //tmpPair.second.first.first = posLearner;
        bLearner._leftChild = posLearner;
        //set the parent idx to zero
        //tmpPair.second.first.second.first = 0;
        //this means that it will be a left child in the tree
        //tmpPair.second.first.second.second = 0;
        //tmpPair.second.second = idxPos;
        bLearner._leftChildIdxSet = idxPos;
    } else {
        BaseLearner* pConstantWeakHypothesisSource =
            BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

        BaseLearner* posLearner = pConstantWeakHypothesisSource->create();
        posLearner->setTrainingData(_pTrainingData);
        //float constantEnergy = posLearner->run();
        dynamic_cast<FeaturewiseLearner*>(posLearner)->run( depthIndex );

        //BaseLearner* posLearner = _baseLearners[0]->copyState();
        //float posEdge = getEdge( posLearner, _pTrainingData );
        posLearner->setTrainingData( _pTrainingData );
        bLearner._leftEdge = posLearner->getEdge();

        //tmpPair.first = posEdge;
        //tmpPair.second.first.first = posLearner;
        bLearner._leftChild = posLearner;
        //set the parent idx to zero
        //tmpPair.second.first.second.first = 0;
        //this means that it will be a left child in the tree
        //tmpPair.second.first.second.second = 0;
        //tmpPair.second.second = idxPos;
        bLearner._leftChildIdxSet = idxPos;
    }

    //retval.push_back( tmpPair );

    _pTrainingData->loadIndexSet( idxNeg );

    if ( ! _pTrainingData->isSamplesFromOneClass() ) {
        BaseLearner* negLearner = _baseLearners[0]->copyState();


        //negLearner->run();
        dynamic_cast<FeaturewiseLearner*>(negLearner)->run( depthIndex );
        //float negEdge = getEdge( negLearner, _pTrainingData );

        negLearner->setTrainingData( _pTrainingData );
        bLearner._rightEdge = negLearner->getEdge();
        //tmpPair.first = negEdge;
        //tmpPair.second.first.first = negLearner;
        bLearner._rightChild = negLearner;
        //set the parent idx to zero
        //tmpPair.second.first.second.first = 0;
        //this means that it will be a right child in the tree
        //tmpPair.second.first.second.second = 1;
        //tmpPair.second.second = idxNeg;
        bLearner._rightChildIdxSet = idxNeg;
    } else {
        BaseLearner* pConstantWeakHypothesisSource =
            BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

        BaseLearner* negLearner =  pConstantWeakHypothesisSource->create();
        negLearner->setTrainingData(_pTrainingData);
        //float constantEnergy = negLearner->run();
        dynamic_cast<FeaturewiseLearner*>(negLearner)->run( depthIndex );

        //tmpPair.first = getEdge( negLearner, _pTrainingData );;
        bLearner._rightChild = negLearner;
        bLearner._rightChild = negLearner;
        //tmpPair.second.first.first = negLearner;
        //set the parent idx to zero
        //tmpPair.second.first.second.first = 0;
        //this means that it will be a right child in the tree
        //tmpPair.second.first.second.second = 1;
        //tmpPair.second.second = idxNeg;
        bLearner._rightChildIdxSet = idxNeg;
    }

    //retval.push_back( tmpPair );

    //return retval;
}

Beispiel #10

Datei anzeigen

Datei: AdaBoostMHLearner.cpp Projekt: busarobi/MDDAG2

	// -------------------------------------------------------------------------
	void AdaBoostMHLearner::run( const nor_utils::Args& args, InputData* pTrainingData, const string baseLearnerName, const int numIterations, vector<BaseLearner*>& foundHypotheses )
	{
		
		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
		BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);
		
		BaseLearner* pConstantWeakHypothesisSource = 
		BaseLearner::RegisteredLearners().getLearner("ConstantLearner");
		
							
		if (_verbose == 1)
			cout << "Learning in progress..." << endl;
		
		
		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = 0; t < numIterations; ++t)
		{
			if ((_verbose > 0)&&((t%100)==0))
				cout << "--------------[ Boosting iteration " << (t+1) << " ]--------------" << endl;				
			
			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();
			
			pWeakHypothesis->setTrainingData(pTrainingData);
			
			AlphaReal energy = pWeakHypothesis->run();
			
			//float gamma = pWeakHypothesis->getEdge();
			//cout << gamma << endl;
			
			if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner
			{
				BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				AlphaReal constantEnergy = pConstantWeakHypothesis->run();
				
				if ( (constantEnergy <= energy) || ( energy != energy ) ) {
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
				}
			}
			
			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			
			// Updates the weights and returns the edge
			AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);
			
			if (_verbose > 1)
			{
				cout << setprecision(5)
				<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
				<< "--> Edge  = " << gamma << endl
				<< "--> Energy  = " << energy << endl
				//            << "--> ConstantEnergy  = " << constantEnergy << endl
				//            << "--> difference  = " << (energy - constantEnergy) << endl
				;
			}
			
			// If gamma <= theta the algorithm must stop.
			// If theta == 0 and gamma is 0, it means that the weak learner is no better than chance
			// and no further training is possible.
			if (gamma <= _theta)
			{
				if (_verbose > 0)
				{
					cout << "Can't train any further: edge = " << gamma 
					<< " (with and edge offset (theta)=" << _theta << ")" << endl;
				}
				
				//          delete pWeakHypothesis;
				//          break; 
			}
						
			// Add it to the internal list of weak hypotheses
			foundHypotheses.push_back(pWeakHypothesis); 
			
		}  // loop on iterations
		/////////////////////////////////////////////////////////
		
		if (_verbose > 0)
			cout << "--------------[ AdaBoost Learning completed. ]--------------" << endl;
	}

Beispiel #11

Datei anzeigen

Datei: AdaBoostMHLearner.cpp Projekt: busarobi/MDDAG2

	void AdaBoostMHLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);
		
		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			AlphaReal constantEnergy = pConstantWeakHypothesis->run();

			//pOutInfo = new OutputInfo(_outputInfoFile);
            pOutInfo = new OutputInfo(args);
			pOutInfo->initialize(pTrainingData);

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader(pTrainingData->getClassMap());

			pOutInfo->outputIteration(-1);
            pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis);
            
			if (pTestData != NULL)
            {
                pOutInfo->separator();
                pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis);   
            }

			pOutInfo->outputCurrentTime();

			pOutInfo->endLine(); 
			pOutInfo->initialize(pTrainingData);

			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		//cout << "Before serialization" << endl;
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		//I put here the starting time, but it may take very long time to load the saved model
		time_t startTime, currentTime;
		time(&startTime);

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();

			pWeakHypothesis->setTrainingData(pTrainingData);
			
			AlphaReal energy = pWeakHypothesis->run();
			
			//float gamma = pWeakHypothesis->getEdge();
			//cout << gamma << endl;

			if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner
			{
				BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				AlphaReal constantEnergy = pConstantWeakHypothesis->run();

				if ( (constantEnergy <= energy) || ( energy != energy ) ) {
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
				}
			}

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// If gamma <= theta the algorithm must stop.
			// If theta == 0 and gamma is 0, it means that the weak learner is no better than chance
			// and no further training is possible.
			if (gamma <= _theta)
			{
				if (_verbose > 0)
				{
					cout << "Can't train any further: edge = " << gamma 
						<< " (with and edge offset (theta)=" << _theta << ")" << endl;
				}

				//          delete pWeakHypothesis;
				//          break; 
			}

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// write the weights of the instances if the name of weights file isn't empty
		printOutWeights( pTrainingData );


		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}

Beispiel #12

Datei anzeigen

Datei: BanditTreeLearner.cpp Projekt: ShenWei/src

	void BanditTreeLearner::calculateChildrenAndEnergies( NodePoint& bLearner ) {
		bLearner._extended = true;
		_pTrainingData->loadIndexSet( bLearner._learnerIdxSet );

		//separate the dataset
		set< int > idxPos, idxNeg;
		idxPos.clear();
		idxNeg.clear();
		float phix;
		float energy;

		for (int i = 0; i < _pTrainingData->getNumExamples(); ++i) {
			// this returns the phi value of classifier
			phix = bLearner._learner->classify(_pTrainingData,i,0);
			if ( phix <  0 )
				idxNeg.insert( _pTrainingData->getRawIndex( i ) );
			else if ( phix > 0 ) { // have to redo the multiplications, haven't been tested
				idxPos.insert( _pTrainingData->getRawIndex( i ) );
			}
		}

		if ( (idxPos.size() < 1 ) || (idxNeg.size() < 1 ) ) {
			bLearner._extended = false;
		}

		_pTrainingData->loadIndexSet( idxPos );
		energy = numeric_limits<float>::signaling_NaN();	

		if ( ! _pTrainingData->isSamplesFromOneClass() ) {
			ScalarLearner* posLearner = dynamic_cast<ScalarLearner* >(_baseLearners[0]->copyState());

			energy = dynamic_cast<FeaturewiseLearner* >(posLearner)->run( _armsForPulling );
			if ( energy == energy ) {
				bLearner._leftEdge = posLearner->getEdge();

				bLearner._leftChild = posLearner;
				bLearner._leftChildIdxSet = idxPos;
			} else {
				delete posLearner;
			}
		}

		if ( energy != energy ) { //we didn't find column, this can occur when we have sparse data
			BaseLearner* pConstantWeakHypothesisSource = 
				BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

			ScalarLearner* posLearner = dynamic_cast<ScalarLearner* >(pConstantWeakHypothesisSource->create());
			posLearner->setTrainingData(_pTrainingData);
			float constantEnergy = posLearner->run();

			bLearner._leftEdge = posLearner->getEdge();
			bLearner._leftChild = posLearner;
			bLearner._leftChildIdxSet = idxPos;
		}

		_pTrainingData->loadIndexSet( idxNeg );
		energy = numeric_limits<float>::signaling_NaN();

		if ( ! _pTrainingData->isSamplesFromOneClass() ) {
			ScalarLearner* negLearner = dynamic_cast<ScalarLearner* >(_baseLearners[0]->copyState());


			energy = dynamic_cast< FeaturewiseLearner* >(negLearner)->run( _armsForPulling );
			if ( energy == energy ) 
			{
				bLearner._rightEdge = negLearner->getEdge();
				bLearner._rightChild = negLearner;
				bLearner._rightChildIdxSet = idxNeg;
			} else {
				delete negLearner;
			}
		}

		if ( energy != energy ) 
		{
			BaseLearner* pConstantWeakHypothesisSource = 
				BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

			ScalarLearner* negLearner =  dynamic_cast<ScalarLearner* >(pConstantWeakHypothesisSource->create());
			negLearner->setTrainingData(_pTrainingData);
			float constantEnergy = negLearner->run();

			bLearner._rightEdge = negLearner->getEdge();
			bLearner._rightChild = negLearner;

			bLearner._rightChildIdxSet = idxNeg;
		}

	}

Beispiel #13

Datei anzeigen

Datei: BanditProductLearner.cpp Projekt: ShenWei/src

	float BanditProductLearner::run()
	{
		if ( ! this->_banditAlgo->isInitialized() ) {
			init();
		}
		// the bandit algorithm selects the subset the tree learner is allowed to use
		// the armindexes will be stored in _armsForPulling
		getArms();

		const int numClasses = _pTrainingData->getNumClasses();
		const int numExamples = _pTrainingData->getNumExamples();

		// Backup original labels
		for (int i = 0; i < numExamples; ++i) {
			const vector<Label>& labels = _pTrainingData->getLabels(i);
			vector<char> exampleLabels;
			for (int l = 0; l < numClasses; ++l)
				exampleLabels.push_back(labels[l].y);
			_savedLabels.push_back(exampleLabels);
		}

		for(int ib = 0; ib < _numBaseLearners; ++ib)
			_baseLearners[ib]->setTrainingData(_pTrainingData);

		float energy = numeric_limits<float>::max();
		float previousEnergy, hx, previousAlpha;
		BaseLearner* pPreviousBaseLearner = 0;

		bool firstLoop = true;
		int ib = -1;
		while (1) {
			ib += 1;
			if (ib >= _numBaseLearners) {
				ib = 0;
				firstLoop = false;
			}
			previousEnergy = energy;
			previousAlpha = _alpha;
			if (pPreviousBaseLearner)
				delete pPreviousBaseLearner;
			if ( !firstLoop ) {
				// take the old learner off the labels
				for (int i = 0; i < numExamples; ++i) {
					vector<Label>& labels = _pTrainingData->getLabels(i);
					for (int l = 0; l < numClasses; ++l) {
						// Here we could have the option of using confidence rated setting so the
						// real valued output of classify instead of its sign
						hx = _baseLearners[ib]->classify(_pTrainingData,i,l);
						if ( hx < 0 )
							labels[l].y *= -1;
						else if ( hx == 0 ) { // have to redo the multiplications, haven't been tested
							for(int ib1 = 0; ib1 < _numBaseLearners && labels[l].y != 0; ++ib1) {
								if (ib != ib1) {
									hx = _baseLearners[ib1]->classify(_pTrainingData,i,l);
									if (hx < 0)
										labels[l].y *= -1;
									else if (hx == 0)
										labels[l].y = 0;
								}
							}
						}
					}
				}
			}
			pPreviousBaseLearner = _baseLearners[ib]->copyState();
			energy = dynamic_cast< FeaturewiseLearner* >(_baseLearners[ib])->run(_armsForPulling );
			// check if it is signailing_nan
			if ( energy != energy )
			{
				if (_verbose > 2) {
					cout << "Cannot find weak hypothesis, constant learner is used!!" << endl;
				}
				BaseLearner* pConstantWeakHypothesisSource = 
					BaseLearner::RegisteredLearners().getLearner("ConstantLearner");
				BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->setTrainingData( _pTrainingData );
				energy = pConstantWeakHypothesis->run();
				
				delete _baseLearners[ib];
				_baseLearners[ib] = pConstantWeakHypothesis;
				
			}
			_alpha = _baseLearners[ib]->getAlpha();
			if (_verbose > 2) {
				cout << "E[" << (ib+1) <<  "] = " << energy << endl << flush;
				cout << "alpha[" << (ib+1) <<  "] = " << _alpha << endl << flush;
			}
			for (int i = 0; i < numExamples; ++i) {
				vector<Label>& labels = _pTrainingData->getLabels(i);
				for (int l = 0; l < numClasses; ++l) {
					// Here we could have the option of using confidence rated setting so the
					// real valued output of classify instead of its sign
					if (labels[l].y != 0) { // perhaps replace it by nor_utils::is_zero(labels[l].y)
						hx = _baseLearners[ib]->classify(_pTrainingData,i,l);
						if ( hx < 0 )
							labels[l].y *= -1;
						else if ( hx == 0 )
							labels[l].y = 0;
					}
				}
			}

			// We have to do at least one full iteration. For real it's not guaranteed
			// Alternatively we could initialize all of them to constant
			//      if ( !firstLoop && energy >= previousEnergy ) {
			//	 if (energy > previousEnergy) {
			//	    _baseLearners[ib] = pPreviousBaseLearner->copyState();
			//           delete pPreviousBaseLearner;
			//	    energy = previousEnergy;
			//	    _alpha = _baseLearners[ib]->getAlpha();
			//	 }
			//	 break;
			//      }
			if ( energy >= previousEnergy ) {
				_alpha = previousAlpha;
				energy = previousEnergy;
				if (firstLoop) {
					for(int ib2 = ib; ib2 < _numBaseLearners; ++ib2)
						delete _baseLearners[ib2];
					_numBaseLearners = ib;
				}
				else {
					_baseLearners[ib] = pPreviousBaseLearner->copyState();
				}
				delete pPreviousBaseLearner;
				break;
			} 
		}

		// Restore original labels
		for (int i = 0; i < numExamples; ++i) {
			vector<Label>& labels = _pTrainingData->getLabels(i);
			for (int l = 0; l < numClasses; ++l)
				labels[l].y = _savedLabels[i][l];
		}

		_id = _baseLearners[0]->getId();
		for(int ib = 1; ib < _numBaseLearners; ++ib)
			_id += "_x_" + _baseLearners[ib]->getId();

		//bandit part we calculate the reward
		_reward = getRewardFromEdge( getEdge() );
		provideRewardForBanditAlgo();

		return energy;
	}