float UCBVHaarSingleStumpLearner::run()
{
    if ( UCBVHaarSingleStumpLearner::_numOfCalling == 0 ) {
        init();
    }

    UCBVHaarSingleStumpLearner::_numOfCalling++;
    //cout << "Num of iter:\t" << UCBVHaarSingleStumpLearner::_numOfCalling << " " << this->getTthSeriesElement( UCBVHaarSingleStumpLearner::_numOfCalling ) << flush << endl;
    const int numClasses = _pTrainingData->getNumClasses();

    // set the smoothing value to avoid numerical problem
    // when theta=0.
    setSmoothingVal( 1.0 / (float)_pTrainingData->getNumExamples() * 0.01 );

    vector<sRates> mu(numClasses); // The class-wise rates. See BaseLearner::sRates for more info.
    vector<float> tmpV(numClasses); // The class-wise votes/abstentions

    float tmpThreshold;
    float tmpAlpha;

    float bestEnergy = numeric_limits<float>::max();
    float tmpEnergy;

    HaarData* pHaarData = static_cast<HaarData*>(_pTrainingData);

    // get the whole data matrix
    //const vector<int*>& intImages = pHaarData->getIntImageVector();

    // The data matrix transformed into the feature's space
    vector< pair<int, float> > processedHaarData(_pTrainingData->getNumExamples());

    // I need to prepare both type of sampling

    StumpAlgorithm<float> sAlgo(numClasses);
    sAlgo.initSearchLoop(_pTrainingData);

    float halfTheta;
    if ( _abstention == ABST_REAL || _abstention == ABST_CLASSWISE )
        halfTheta = _theta/2.0;
    else
        halfTheta = 0;

    // The declared features types
    vector<HaarFeature*>& loadedFeatures = pHaarData->getLoadedFeatures();

    // for every feature type
    vector<HaarFeature*>::iterator ftIt;
    //vector<HaarFeature*>::iterator maxftIt;

    vector<float> maxV( loadedFeatures.size() );
    vector<int> maxKey( loadedFeatures.size() );
    vector<int> maxNum( loadedFeatures.size() );

    //claculate the Bk,s,t of the randomly chosen features
    int key = getKeyOfMaximalElement();
    int featureIdx = (int) (key / 10);
    int featureType = (key % 10);

    //for (i = 0, ftIt = loadedFeatures.begin(); ftIt != loadedFeatures.end(); i++ ++ftIt)
    //*ftIt = loadedFeatures[ featureType ];

    // just for readability
    //HaarFeature* pCurrFeature = *ftIt;
    HaarFeature* pCurrFeature = loadedFeatures[ featureType ];
    if (_samplingType != ST_NO_SAMPLING)
        pCurrFeature->setAccessType(AT_RANDOM_SAMPLING);

    // Reset the iterator on the configurations. For random sampling
    // this clear the visited list
    pCurrFeature->loadConfigByNum( featureIdx );


    if (_verbose > 1)
        cout << "Learning type " << pCurrFeature->getName() << ".." << flush;

    // transform the data from intImages to the feature's space
    pCurrFeature->fillHaarData( _pTrainingData->getExamples(), processedHaarData );
    //pCurrFeature->fillHaarData(intImages, processedHaarData);

    // sort the examples in the new space by their coordinate
    sort( processedHaarData.begin(), processedHaarData.end(),
          nor_utils::comparePair<2, int, float, less<float> >() );

    // find the optimal threshold
    tmpThreshold = sAlgo.findSingleThresholdWithInit(processedHaarData.begin(),
                   processedHaarData.end(),
                   _pTrainingData, halfTheta, &mu, &tmpV);

    tmpEnergy = getEnergy(mu, tmpAlpha, tmpV);


    // Store it in the current weak hypothesis.
    // note: I don't really like having so many temp variables
    // but the alternative would be a structure, which would need
    // to be inheritable to make things more consistent. But this would
    // make it less flexible. Therefore, I am still undecided. This
    // might change!
    _alpha = tmpAlpha;
    _v = tmpV;

    // I need to save the configuration because it changes within the object
    _selectedConfig = pCurrFeature->getCurrentConfig();
    // I save the object because it contains the informations about the type,
    // the name, etc..
    _pSelectedFeature = pCurrFeature;
    _threshold = tmpThreshold;

    bestEnergy = tmpEnergy;

    float edge = 0.0;
    for( vector<sRates>::iterator itR = mu.begin(); itR != mu.end(); itR++ ) edge += ( itR->rPls - itR->rMin );
    //need to set the X value
    updateKeys( key, edge * edge );

    if (!_pSelectedFeature)
    {
        cerr << "ERROR: No Haar Feature found. Something must be wrong!" << endl;
        exit(1);
    }
    else
    {
        if (_verbose > 1)
            cout << "Selected type: " << _pSelectedFeature->getName() << endl;
    }

    return bestEnergy;
}
示例#2
0
	AlphaReal HaarMultiStumpLearner::run()
	{
		const int numClasses = _pTrainingData->getNumClasses();
		
		// set the smoothing value to avoid numerical problem
		// when theta=0.
		setSmoothingVal( 1.0 / (AlphaReal)_pTrainingData->getNumExamples() * 0.01 );
		
		vector<sRates> mu(numClasses); // The class-wise rates. See BaseLearner::sRates for more info.
		vector<AlphaReal> tmpV(numClasses); // The class-wise votes/abstentions
		
		vector<FeatureReal> tmpThresholds(numClasses);
		AlphaReal tmpAlpha;
		
		AlphaReal bestEnergy = numeric_limits<AlphaReal>::max();
		AlphaReal tmpEnergy;
		
		HaarData* pHaarData = static_cast<HaarData*>(_pTrainingData);
		
		// get the whole data matrix
		//   const vector<int*>& intImages = pHaarData->getIntImageVector();
		
		// The data matrix transformed into the feature's space
		vector< pair<int, FeatureReal> > processedHaarData(_pTrainingData->getNumExamples());
		
		// I need to prepare both type of sampling
		int numConf; // for ST_NUM
		time_t startTime, currentTime; // for ST_TIME
		
		long numProcessed;
		bool quitConfiguration;
		
		StumpAlgorithm<FeatureReal> sAlgo(numClasses);
		sAlgo.initSearchLoop(_pTrainingData);
		
		// The declared features types
		vector<HaarFeature*>& loadedFeatures = pHaarData->getLoadedFeatures();
		
		// for every feature type
		vector<HaarFeature*>::iterator ftIt;
		for (ftIt = loadedFeatures.begin(); ftIt != loadedFeatures.end(); ++ftIt)
		{
			// just for readability
			HaarFeature* pCurrFeature = *ftIt;
			if (_samplingType != ST_NO_SAMPLING)
				pCurrFeature->setAccessType(AT_RANDOM_SAMPLING);
			
			// Reset the iterator on the configurations. For random sampling
			// this shuffles the configurations.
			pCurrFeature->resetConfigIterator();
			quitConfiguration = false;
			numProcessed = 0;
			
			numConf = 0;
			time( &startTime );
			
			if (_verbose > 1)
				cout << "Learning type " << pCurrFeature->getName() << ".." << flush;
			
			// While there is a configuration available
			while ( pCurrFeature->hasConfigs() ) 
			{
				// transform the data from intImages to the feature's space
				pCurrFeature->fillHaarData(_pTrainingData->getExamples(), processedHaarData);
				// sort the examples in the new space by their coordinate
				sort( processedHaarData.begin(), processedHaarData.end(), 
					 nor_utils::comparePair<2, int, float, less<float> >() );
				
				// find the optimal threshold
				sAlgo.findMultiThresholdsWithInit(processedHaarData.begin(), processedHaarData.end(), 
												  _pTrainingData, tmpThresholds, &mu, &tmpV);
				
				tmpEnergy = getEnergy(mu, tmpAlpha, tmpV);
				++numProcessed;
				
				if (tmpEnergy < bestEnergy)
				{
					// Store it in the current weak hypothesis.
					// note: I don't really like having so many temp variables
					// but the alternative would be a structure, which would need
					// to be inheritable to make things more consistent. But this would
					// make it less flexible. Therefore, I am still undecided. This
					// might change!
					_alpha = tmpAlpha;
					_v = tmpV;
					
					// I need to save the configuration because it changes within the object
					_selectedConfig = pCurrFeature->getCurrentConfig();
					// I save the object because it contains the informations about the type,
					// the name, etc..
					_pSelectedFeature = pCurrFeature;
					_thresholds = tmpThresholds;
					
					bestEnergy = tmpEnergy;
				}
				
				// Move to the next configuration
				pCurrFeature->moveToNextConfig();
				
				// check stopping criterion for random configurations
				switch (_samplingType)
				{
					case ST_NUM:
						++numConf;
						if (numConf >= _samplingVal)
							quitConfiguration = true;
						break;
					case ST_TIME:            
					{
						time( &currentTime );
						float diff = difftime(currentTime, startTime); // difftime is in seconds
						if (diff >= _samplingVal)
							quitConfiguration = true;
					}
						break;
					case ST_NO_SAMPLING:
						perror("ERROR: st no sampling... not sure what this means");
						
						break;
						
				} // end switch
				
				if (quitConfiguration)
					break;
				
			} // end while
			
			if (_verbose > 1)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				
				cout << "done! "
				<< "(processed: " << numProcessed
				<< " - elapsed: " << diff << " sec)" 
				<< endl;
			}
			
		}
		
		if (!_pSelectedFeature)
		{
			cerr << "ERROR: No Haar Feature found. Something must be wrong!" << endl;
			exit(1);
		}
		else
		{
			if (_verbose > 1)
				cout << "Selected type: " << _pSelectedFeature->getName() << endl;
		}
		
		return bestEnergy;
	}