Example #1
0
void Spectra::sortAndPrepareSpectra(){
	
	long totskippedpeaks = 0;
	long totlen = 0;
	long totlenB = 0;

	//sort by parent mass
	sort(spectra.begin(), spectra.end(), SpectrumComparator());  //greater<Spectrum*>());
	
	//rearrange spectra
	if(se1->rearrangespectra == true){
		if(se1->writespecpos == true) writeSpectrumPositionsInMemory(se1->getOutFileName("specposfirst"));
		if(se1->outputlevel > 2) cout << "\nRearranging spectra in memory to increase performance for long-running searches. This may take seconds to minutes." << flush;
		//rearrange spectra in memory
		Spectrum *src, *destinationspectrum;
		vector<Spectrum*> spectraold;
		for(int i = 0; i < spectra.size(); i++){
			src = spectra[i];
			destinationspectrum = new Spectrum(src);
			spectra[i] = destinationspectrum;
			spectraold.push_back(src);
		}
		for(int i = 0; i < spectraold.size(); i++){
			delete spectraold[i];
		}
		spectraold.clear();
		if(se1->writespecpos == true) writeSpectrumPositionsInMemory(se1->getOutFileName("specposafter"));
	}
	
	//initialize BestMatches, index them for writing results later on
	for(int i = 0; i < spectra.size(); i++){
		spectra[i]->initializeBestMatches();
		//spectra[i]->bestmatches1->i = i;
	}
	
	//collect spectrum statistics
	for(int i = 0; i < spectra.size(); i++){
		parentmasses.push_back(spectra[i]->parentmassMH);
		totlen += spectra[i]->lengthgood;
        if (se1->outputlevel >2 ) cout << spectra[i] -> lengthgood << endl;
		totlenB += spectra[i]->lengthB;
		totskippedpeaks += spectra[i]->skippedpeaks;
		if(se1->outputlevel > 5) cout << "\nSpectra::sortSpectraAndExtractPM: pointer " << (long)spectra[i] << "  parent mass " << spectra[i]->parentmassMH;
	}

	//initialize spectrum parameters for hypergeometric score
	unreachablebins1->initialize();
	for(int i = 0; i < spectra.size(); i++){
		Spectrum *spec = spectra[i];
		spec->initializeHypergeometricNK(); //naive estimation for N and K
		
		//reestimation for N
		spec->N = spec->parentmassMHB - unreachablebins1->getTotal(se1->trypticendsrequired) - se1->blockedbins;
		
		//reestimation for K
		spec->K = unreachablebins1->estimateK(spec->binaryB, spec->parentmassMHB, se1->trypticendsrequired);
		
		//avoid exceptions in hypergeometric calculations
		if(spec->N < spec->K) spec->N = spec->K;
	}
	
	//set parent mass limits for tuple generation
	if(parentmasses.size() > 0) se1->setMinMaxPM(parentmasses[0], parentmasses[spectra.size() - 1]);
	
	//progressreport statistics
	double sf = se1->dnaAA1->scaling_factor;
	if(se1->outputlevel > 2){
		if(spectra.size() > 0){
			cout << "\nSpectra::sortSpectraAndExtractPM:";
			cout << "\nThere are " << spectra.size() << " spectra which were sorted according to their parent mass.";
			cout << "\nA spectrum contains on average " << (double)totlen / (double)spectra.size() << " correct peaks.";
			cout << "\nAfter preprocessing, the spectrum contains on average " << (double)totlenB / (double)spectra.size() << " peaks.";
			cout << "\nA spectrum contains on average " << (double)totskippedpeaks / (double) spectra.size() << " peaks of m/z < 0 or m/z > parentmass. Those were skipped.";
			cout << "\nLowest parent mass in spectra : " << parentmasses[0]/sf << "   Lowest theoretical parent mass considered: " << se1->min_monoparentmassMH / sf;
			cout << "\nHighest parent mass in spectra: " << parentmasses[spectra.size() - 1] / sf << "   Highest theoretical parent mass considered: " << se1->max_monoparentmassMH / sf;
		}else{
			cout << "\nSpectra::sortSpectraAndExtractPM: No spectra were parsed!";
		}
	}
}