void Spectra::sortAndPrepareSpectra(){ long totskippedpeaks = 0; long totlen = 0; long totlenB = 0; //sort by parent mass sort(spectra.begin(), spectra.end(), SpectrumComparator()); //greater<Spectrum*>()); //rearrange spectra if(se1->rearrangespectra == true){ if(se1->writespecpos == true) writeSpectrumPositionsInMemory(se1->getOutFileName("specposfirst")); if(se1->outputlevel > 2) cout << "\nRearranging spectra in memory to increase performance for long-running searches. This may take seconds to minutes." << flush; //rearrange spectra in memory Spectrum *src, *destinationspectrum; vector<Spectrum*> spectraold; for(int i = 0; i < spectra.size(); i++){ src = spectra[i]; destinationspectrum = new Spectrum(src); spectra[i] = destinationspectrum; spectraold.push_back(src); } for(int i = 0; i < spectraold.size(); i++){ delete spectraold[i]; } spectraold.clear(); if(se1->writespecpos == true) writeSpectrumPositionsInMemory(se1->getOutFileName("specposafter")); } //initialize BestMatches, index them for writing results later on for(int i = 0; i < spectra.size(); i++){ spectra[i]->initializeBestMatches(); //spectra[i]->bestmatches1->i = i; } //collect spectrum statistics for(int i = 0; i < spectra.size(); i++){ parentmasses.push_back(spectra[i]->parentmassMH); totlen += spectra[i]->lengthgood; if (se1->outputlevel >2 ) cout << spectra[i] -> lengthgood << endl; totlenB += spectra[i]->lengthB; totskippedpeaks += spectra[i]->skippedpeaks; if(se1->outputlevel > 5) cout << "\nSpectra::sortSpectraAndExtractPM: pointer " << (long)spectra[i] << " parent mass " << spectra[i]->parentmassMH; } //initialize spectrum parameters for hypergeometric score unreachablebins1->initialize(); for(int i = 0; i < spectra.size(); i++){ Spectrum *spec = spectra[i]; spec->initializeHypergeometricNK(); //naive estimation for N and K //reestimation for N spec->N = spec->parentmassMHB - unreachablebins1->getTotal(se1->trypticendsrequired) - se1->blockedbins; //reestimation for K spec->K = unreachablebins1->estimateK(spec->binaryB, spec->parentmassMHB, se1->trypticendsrequired); //avoid exceptions in hypergeometric calculations if(spec->N < spec->K) spec->N = spec->K; } //set parent mass limits for tuple generation if(parentmasses.size() > 0) se1->setMinMaxPM(parentmasses[0], parentmasses[spectra.size() - 1]); //progressreport statistics double sf = se1->dnaAA1->scaling_factor; if(se1->outputlevel > 2){ if(spectra.size() > 0){ cout << "\nSpectra::sortSpectraAndExtractPM:"; cout << "\nThere are " << spectra.size() << " spectra which were sorted according to their parent mass."; cout << "\nA spectrum contains on average " << (double)totlen / (double)spectra.size() << " correct peaks."; cout << "\nAfter preprocessing, the spectrum contains on average " << (double)totlenB / (double)spectra.size() << " peaks."; cout << "\nA spectrum contains on average " << (double)totskippedpeaks / (double) spectra.size() << " peaks of m/z < 0 or m/z > parentmass. Those were skipped."; cout << "\nLowest parent mass in spectra : " << parentmasses[0]/sf << " Lowest theoretical parent mass considered: " << se1->min_monoparentmassMH / sf; cout << "\nHighest parent mass in spectra: " << parentmasses[spectra.size() - 1] / sf << " Highest theoretical parent mass considered: " << se1->max_monoparentmassMH / sf; }else{ cout << "\nSpectra::sortSpectraAndExtractPM: No spectra were parsed!"; } } }