Esempio n. 1
0
	void VJCascadeClassifier::savePosteriors(const string& dataFileName, const string& shypFileName, 
											  const string& outFileName, int numIterations)
	{
		// loading data
		InputData* pData = loadInputData(dataFileName, shypFileName);
		const int numOfExamples = pData->getNumExamples();
		
		//get the index of positive label		
		const NameMap& namemap = pData->getClassMap();
		_positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName );
		
		
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		
		
		// open outfile
		ofstream outRes(outFileName.c_str());
		if (!outRes.is_open())
		{
			cout << "Cannot open outfile!!! " << outFileName << endl;
		}
				
		
		// The class that loads the weak hypotheses
		UnSerialization us;
		
		// Where to put the weak hypotheses
		vector<vector<BaseLearner*> > weakHypotheses;
        		
		// For stagewise thresholds 
		vector<AlphaReal> thresholds(0);
		// loads them
		//us.loadHypotheses(shypFileName, weakHypotheses, pData);
		us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData);
		
		// output the number of stages
		outRes << "StageNum " << weakHypotheses.size() << endl;
		
		// output original labels
		outRes << "Labels";
		for(int i=0; i<numOfExamples; ++i )
		{		
			vector<Label>& labels = pData->getLabels(i);
			if (labels[_positiveLabelIndex].y>0) // pos label				
				outRes << " 1";
			else
				outRes << " 0";
		}				
		outRes << endl;
		
		// store result
		vector<CascadeOutputInformation> cascadeData(0);
		vector<CascadeOutputInformation>::iterator it;
		
		cascadeData.resize(numOfExamples);		
		for( it=cascadeData.begin(); it != cascadeData.end(); ++it )
		{
			it->active=true;
		}										
		
		for(int stagei=0; stagei < weakHypotheses.size(); ++stagei )
		{
			// for posteriors
			vector<AlphaReal> posteriors(0);		
			
			// calculate the posteriors after stage
			VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex );			
			
			// update the data (posteriors, active element index etc.)
			//VJCascadeLearner::forecastOverAllCascade( pData, posteriors, activeInstances, thresholds[stagei] );
			updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData);
			
			
			int numberOfActiveInstance = 0;
			for( int i = 0; i < numOfExamples; ++i )
				if (cascadeData[i].active) numberOfActiveInstance++;
			
			if (_verbose > 0 )
				cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl;									
			
			// output stats
			outRes << "Stage " << stagei << " " << weakHypotheses[stagei].size() << endl; 

			outRes << "Forecast";
			for(int i=0; i<numOfExamples; ++i )
			{	
				outRes << " " << cascadeData[i].forecast;
			}				
			outRes << endl;

			outRes << "Active";
			for(int i=0; i<numOfExamples; ++i )
			{	
				if( cascadeData[i].active)
					outRes << " 1";
				else
					outRes << " 0";
			}				
			outRes << endl;

			outRes << "Posteriors";
			for(int i=0; i<numOfExamples; ++i )
			{	
				outRes << " " << cascadeData[i].score;
			}				
			outRes << endl;
			
		}						
		
		outRes.close();
		
		// free memory allocation
		vector<vector<BaseLearner*> >::iterator bvIt;
		for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt )
		{
			vector<BaseLearner* >::iterator bIt;
			for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt )
				delete *bIt;
		}
	}
Esempio n. 2
0
	void VJCascadeClassifier::run(const string& dataFileName, const string& shypFileName, 
								   int numIterations, const string& outResFileName )
	{
		// loading data
		InputData* pData = loadInputData(dataFileName, shypFileName);
		const int numOfExamples = pData->getNumExamples();
				
		//get the index of positive label		
		const NameMap& namemap = pData->getClassMap();
		_positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName );				
		
		if (_verbose > 0)
			cout << "Loading strong hypothesis..." << flush;
		
		
		
		// The class that loads the weak hypotheses
		UnSerialization us;
		
		// Where to put the weak hypotheses
		vector<vector<BaseLearner*> > weakHypotheses;
		
		// For stagewise thresholds 
		vector<AlphaReal> thresholds(0);
        
		// loads them
		//us.loadHypotheses(shypFileName, weakHypotheses, pData);
		us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData);
		

		// store result
		vector<CascadeOutputInformation> cascadeData(0);
		vector<CascadeOutputInformation>::iterator it;
		
		cascadeData.resize(numOfExamples);		
		for( it=cascadeData.begin(); it != cascadeData.end(); ++it )
		{
			it->active=true;
		}										
		
		if (!_outputInfoFile.empty())
		{
			outputHeader();
		}
		
		for(int stagei=0; stagei < weakHypotheses.size(); ++stagei )
		{
			// for posteriors
			vector<AlphaReal> posteriors(0);		
			
			// calculate the posteriors after stage
			VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex );			
			
			// update the data (posteriors, active element index etc.)
			updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData);
			
			if (!_outputInfoFile.empty())
			{
				_output << stagei + 1 << "\t";
				_output << weakHypotheses[stagei].size() << "\t";
				outputCascadeResult( pData, cascadeData );
			}
			
			int numberOfActiveInstance = 0;
			for( int i = 0; i < numOfExamples; ++i )
				if (cascadeData[i].active) numberOfActiveInstance++;
			
			if (_verbose > 0 )
				cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl;									
		}
				
		vector<vector<int> > confMatrix(2);
		confMatrix[0].resize(2);
		fill( confMatrix[0].begin(), confMatrix[0].end(), 0 );
		confMatrix[1].resize(2);
		fill( confMatrix[1].begin(), confMatrix[1].end(), 0 );
		
	    // print accuracy
		for(int i=0; i<numOfExamples; ++i )
		{		
			vector<Label>& labels = pData->getLabels(i);
			if (labels[_positiveLabelIndex].y>0) // pos label				
				if (cascadeData[i].forecast==1)
					confMatrix[1][1]++;
				else
					confMatrix[1][0]++;
			else // negative label
				if (cascadeData[i].forecast==0)
					confMatrix[0][0]++;
				else
					confMatrix[0][1]++;
		}			
		
		double acc = 100.0 * (confMatrix[0][0] + confMatrix[1][1]) / ((double) numOfExamples);
		// output it
		cout << endl;
		cout << "Error Summary" << endl;
		cout << "=============" << endl;
		
		cout << "Accuracy: " << setprecision(4) << acc << endl;
		cout << setw(10) << "\t" << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << endl;
		cout << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << confMatrix[0][0] << setw(10) << confMatrix[0][1] << endl;
		cout << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << setw(10) << confMatrix[1][0] << setw(10) << confMatrix[1][1] << endl;		
		
		// output forecast 
		if (!outResFileName.empty() ) outputForecast(pData, outResFileName, cascadeData );
						
		// free memory allocation
		vector<vector<BaseLearner*> >::iterator bvIt;
		for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt )
		{
			vector<BaseLearner* >::iterator bIt;
			for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt )
				delete *bIt;
		}
	}
Esempio n. 3
0
string KmerTree::getTaxonomy(Sequence* thisSeq, string& simpleTax, bool& flipped){
	try {
        simpleTax = "";
        string seqName = thisSeq->getName(); string querySequence = thisSeq->getAligned(); string taxonProbabilityString = "";
        string unalignedSeq = thisSeq->getUnaligned();
        
        double logPOutlier = (querySequence.length() - kmerSize + 1) * log(1.0/(double)tree[0]->getNumUniqueKmers());
        
        vector<int> queryProfile = ripKmerProfile(unalignedSeq);	//convert to kmer vector
        
        vector<vector<double> > pXgivenKj_D_j(numLevels);
        vector<vector<int> > indices(numLevels);
        for(int i=0;i<numLevels;i++){
            if (m->getControl_pressed()) { return taxonProbabilityString; }
            pXgivenKj_D_j[i].push_back(logPOutlier);
            indices[i].push_back(-1);
        }
        
        for(int i=0;i<numTaxa;i++){
            if (m->getControl_pressed()) { return taxonProbabilityString; }
            pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(queryProfile));
            indices[tree[i]->getLevel()].push_back(i);
        }
        
        vector<double> sumLikelihood(numLevels, 0);
        vector<double> bestPosterior(numLevels, 0);
        vector<int> maxIndex(numLevels, 0);
        int maxPosteriorIndex;
        
        //let's find the best level and taxa within that level
        for(int i=0;i<numLevels;i++){ //go across all j's - from the root to genus
            if (m->getControl_pressed()) { return taxonProbabilityString; }
            
            int numTaxaInLevel = (int)indices[i].size();
            
            vector<double> posteriors(numTaxaInLevel, 0);		
            sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex);
            
            maxPosteriorIndex = 0;
            for(int j=0;j<numTaxaInLevel;j++){
                posteriors[j] = exp(pXgivenKj_D_j[i][j] - sumLikelihood[i]);
                if(posteriors[j] > posteriors[maxPosteriorIndex]){	
                    maxPosteriorIndex = j;
                }
                
            }
            
            maxIndex[i] = getMinRiskIndexKmer(queryProfile, indices[i], posteriors);
            
            maxIndex[i] = maxPosteriorIndex;
            bestPosterior[i] = posteriors[maxIndex[i]];	
        }
        
        int saneDepth = sanityCheck(indices, maxIndex);
        
        simpleTax = "";
        int savedspot = 1;
        taxonProbabilityString = "";
        for(int i=1;i<=saneDepth;i++){
            if (m->getControl_pressed()) { return taxonProbabilityString; }
            int confidenceScore = (int) (bestPosterior[i] * 100);
            if (confidenceScore >= confidenceThreshold) {
                if(indices[i][maxIndex[i]] != -1){
                    taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + "(" + toString(confidenceScore) + ");";
                    simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";";
                }
                else{
                    taxonProbabilityString += "unclassified(" + toString(confidenceScore) + ");";
                    simpleTax += "unclassified;";
                }
            }else { break; }
            savedspot = i;
        }
        
        
        
        for(int i=savedspot+1;i<numLevels;i++){
            if (m->getControl_pressed()) { return taxonProbabilityString; }
            taxonProbabilityString += "unclassified(0);";
            simpleTax += "unclassified;";
        }
        
        return taxonProbabilityString;
	}
	catch(exception& e) {
		m->errorOut(e, "KmerTree", "getTaxonomy");
		exit(1);
	}
}