//Binary search to quickly find the nearest peak int CNoiseReduction::NearestPeak(Spectrum& sp, double mz){ int pivot=0; int width=0; int lastWidth=0; int best=0; double dif=9999999.9; double d; pivot=sp.size()/2; width=(int)(pivot/2.0+0.5); while(width!=lastWidth && pivot<sp.size() && pivot>-1){ d=fabs(sp.at(pivot).mz-mz); if(d<dif){ dif=d; best=pivot; } if(sp.at(pivot).mz==mz){ return pivot; } else if(sp.at(pivot).mz > mz){ pivot-=width; lastWidth=width; width=(int)(width/2.0+0.5); } else { pivot+=width; lastWidth=width; width=(int)(width/2.0+0.5); } } if(pivot<sp.size() && pivot>-1){ d=fabs(sp.at(pivot).mz-mz); if(d<dif) best=pivot; } return best; }
//#ifdef _MSC_VER //#ifndef MSSINGLESCAN_MAIN //int mssinglescan_main(int argc, char * argv[] ) { //#else //int main(int argc, char *argv[]){ //#endif //#else int main(int argc, char *argv[]){ //#endif //Here are all the variable we are going to need MSReader r; Spectrum s; int j; if(argc==1){ printf("DESCRIPTION: Reads an MS/MS spectrum from any MSToolkit supported file type and outputs to screen in MS2 format.\n\n"); printf("USAGE: MSSingleScan [scan number] [file]\n"); exit(0); }; r.readFile(argv[2],s,atoi(argv[1])); if(s.getScanNumber()==0) exit(-1); printf("S\t%d\t%d\t%.*f\n",s.getScanNumber(),s.getScanNumber(),2,s.getMZ()); if(s.getRTime()>0) printf("I\tRTime\t%.*f\n",4,s.getRTime()); for(j=0;j<s.sizeZ();j++){ printf("Z\t%d\t%.*f\n",s.atZ(j).z,2,s.atZ(j).mz); }; for(j=0;j<s.size();j++){ printf("%.4f %.4f\n",s.at(j).mz,s.at(j).intensity); }; return 0; };
double CNoiseReduction::CParam(Spectrum& sp, int tot){ int i; int j=0; double d=0.0; if(sp.size()<10) i=1; else i=sp.size()/2; while(i<sp.size()-1){ if(sp.at(i).intensity > sp.at(i-1).intensity && sp.at(i).intensity > sp.at(i+1).intensity) { d+= ((1/sp.at(i).mz) - (1/sp.at(i+1).mz)); j++; i++; if(j==tot) break; } else { i++; } } if(j==0) return 0.0; else return d/j; }
bool CNoiseReduction::ScanAverage(Spectrum& sp, char* file, int width, float cutoff){ Spectrum ts; Spectrum ps=sp; MSReader r; int i; int j; int k; int widthCount=0; int numScans=1; double dif; double prec; double dt; double c=CParam(ps,3); bool bLeft=true; int posLeft=ps.getScanNumber()-1; int posRight=ps.getScanNumber()+1; char cFilter1[256]; char cFilter2[256]; ps.getRawFilter(cFilter1,256); while(widthCount<(width*2)){ //Alternate looking left and right if(bLeft){ bLeft=false; widthCount++; while(posLeft>0){ r.readFile(file,ts,posLeft); if(ts.getScanNumber()==0) break; ts.getRawFilter(cFilter2,256); if(strcmp(cFilter1,cFilter2)==0) break; posLeft--; } } else { bLeft=true; widthCount++; while(true){ r.readFile(file,ts,posRight); if(ts.getScanNumber()==0) break; ts.getRawFilter(cFilter2,256); if(strcmp(cFilter1,cFilter2)==0) break; posRight++; } } if(ts.getScanNumber()==0) continue; numScans++; //Match peaks between pivot scan and temp scan k=0; for(i=0;i<ps.size();i++){ dif=100000.0; prec = c * ps.at(i).mz * ps.at(i).mz / 2; for(j=k;j<ts.size();j++){ dt=fabs(ps.at(i).mz-ts.at(j).mz); if(dt<=dif) { if(dt<prec) { ps.at(i).intensity+=ts.at(j).intensity; ts.at(j).mz=-1.0; k=j+1; break; } dif=dt; } else { k=j-1; break; } } } //Add unmatched peaks from temp scan for(i=0;i<ts.size();i++){ if(ts.at(i).mz>-1.0) ps.add(ts.at(i)); } //Sort pivot scan peaks for fast traversal against next temp scan ps.sortMZ(); } //Average points and apply cutoff sp.clear(); for(i=0;i<ps.size();i++) { ps.at(i).intensity/=numScans; if(ps.at(i).intensity>=cutoff) sp.add(ps.at(i)); } sp.setScanNumber(ps.getScanNumber()); sp.setScanNumber(ps.getScanNumber(true),true); sp.setRTime(ps.getRTime()); return true; }
//First derivative method taken from CSpecAnalyze, returns base peak intensity of the set void CNoiseReduction::FirstDerivativePeaks(Spectrum& sp, int winSize){ int i,j; float maxIntensity; int bestPeak; bool bLastPos; Spectrum gp; int nextBest; double FWHM; Peak_T centroid; bLastPos=false; for(i=0;i<sp.size()-winSize;i++){ if(sp.at(i).intensity<sp.at(i+winSize).intensity) { bLastPos=true; continue; } else { if(bLastPos){ bLastPos=false; //find max and add peak maxIntensity=0; for(j=i;j<i+winSize;j++){ if (sp.at(j).intensity>maxIntensity){ maxIntensity=sp.at(j).intensity; bestPeak = j; } } //Best estimate of Gaussian centroid //Get 2nd highest point of peak if(bestPeak==sp.size()-1){ nextBest=bestPeak-1; } else if(sp.at(bestPeak-1).intensity > sp.at(bestPeak+1).intensity){ nextBest=bestPeak-1; } else { nextBest=bestPeak+1; } //Get FWHM FWHM = calcFWHM(sp.at(bestPeak).mz); //Calc centroid MZ (in three lines for easy reading) centroid.mz = pow(FWHM,2)*log(sp.at(bestPeak).intensity/sp.at(nextBest).intensity); centroid.mz /= GC*(sp.at(bestPeak).mz-sp.at(nextBest).mz); centroid.mz += (sp.at(bestPeak).mz+sp.at(nextBest).mz)/2; //Calc centroid intensity centroid.intensity=(float)(sp.at(bestPeak).intensity/exp(-pow((sp.at(bestPeak).mz-centroid.mz)/FWHM,2)*GC)); //some peaks are funny shaped and have bad gaussian fit. //if error is more than 10%, keep existing intensity if( fabs((sp.at(bestPeak).intensity - centroid.intensity) / centroid.intensity * 100) > 10 || //not a good check for infinity centroid.intensity>999999999999.9 || centroid.intensity < 0 ) { centroid.intensity=sp.at(bestPeak).intensity; } //Hack until I put in mass ranges if(centroid.mz<0 || centroid.mz>2000) { //do nothing if invalid mz } else { gp.add(centroid); } i+=winSize-1; } } } int scanNumber=sp.getScanNumber(); int scanNumber2=sp.getScanNumber(false); float rTime=sp.getRTime(); sp = gp; sp.setRTime(rTime); sp.setScanNumber(scanNumber); sp.setScanNumber(scanNumber2,true); }
bool CNoiseReduction::ScanAveragePlusDeNoise(Spectrum& sp, char* file, int width, float cutoff, int scanNum){ Spectrum ts; Spectrum ps; //MSReader r; vector<int> v; vector<int> vPos; int i; int j; int k; int widthCount=0; int numScans=1; int match; double dif; double prec; double dt; double c; bool bLeft=true; int posLeft=ps.getScanNumber()-1; int posRight=ps.getScanNumber()+1; int index; char cFilter1[256]; //char cFilter2[256]; sp.clear(); //if file is not null, create new buffer if(file!=NULL){ strcpy(lastFile,file); bs.clear(); if(scanNum>0) r->readFile(file,ts,scanNum); else r->readFile(file,ts); if(ts.getScanNumber()==0) return false; bs.push_back(ts); ps=bs[0]; c=CParam(ps,3); posA=0; } else { posA++; //cout << "ER: " << posA << " " << bs.size() << endl; if(posA>=(int)bs.size()) return false; //end of buffer, no more data ps=bs[posA]; c=CParam(ps,3); } //set our pivot spectrum //ps=bs[posA]; ps.getRawFilter(cFilter1,256); //cout << "Averaging: " << ps.getScanNumber() << endl; posLeft=posA; posRight=posA; while(widthCount<(width*2)){ index=-1; //Alternate looking left and right if(bLeft){ bLeft=false; widthCount++; while(true){ posLeft--; //cout << posLeft << endl; if(posLeft<0) { //buffer is too short on left, add spectra i=bs[0].getScanNumber(); while(true){ i--; //cout << "I: " << i << endl; if(i==0) break; r->readFile(lastFile,ts,i); if(ts.getScanNumber()==0) continue; else break; } if(i==0) break; bs.push_front(ts); for(i=0;i<(int)v.size();i++)v[i]++; posA++; posRight++; posLeft=0; //ts.getRawFilter(cFilter2,256); if(ts.getMsLevel()==cs.msLevel) { index=posLeft; break; } } else { //bs[posLeft].getRawFilter(cFilter2,256); if(bs[posLeft].getMsLevel()==cs.msLevel) { index=posLeft; break; } } } } else { bLeft=true; widthCount++; while(true){ posRight++; if(posRight>=(int)bs.size()) { //buffer is too short on right, add spectra r->readFile(lastFile,ts,bs[bs.size()-1].getScanNumber()); r->readFile(NULL,ts); if(ts.getScanNumber()==0) { posRight--; break; } bs.push_back(ts); //ts.getRawFilter(cFilter2,256); if(ts.getMsLevel()==cs.msLevel) { index=posRight; break; } } else { //bs[posRight].getRawFilter(cFilter2,256); if(bs[posRight].getMsLevel()==cs.msLevel) { index=posRight; break; } } } } if(index==-1) continue; //ts=bs[index]; v.push_back(index); numScans++; } //cout << "Still Averaging: " << ps.getScanNumber() << endl; //cout << " with: "; //for(i=0;i<v.size();i++) cout << bs[v[i]].getScanNumber() << " "; //cout << endl; //cout << numScans << " " << v.size() << endl; //Match peaks between pivot scan and neighbors for(i=0;i<(int)v.size();i++) vPos.push_back(0); for(i=0;i<(int)ps.size();i++){ //iterate all points prec = c * ps.at(i).mz * ps.at(i).mz / 2; match=1; for(k=0;k<(int)v.size();k++){ //iterate all neighbors dif=100000.0; //cout << "Checking " << bs[v[k]].getScanNumber() << " pos " << vPos[k] << endl; for(j=vPos[k];j<bs[v[k]].size();j++){ //check if point is a match dt=fabs(ps.at(i).mz-bs[v[k]].at(j).mz); if(dt<=dif) { if(dt<prec) { ps.at(i).intensity+=bs[v[k]].at(j).intensity; vPos[k]=j+1; match++; break; } dif=dt; } else { vPos[k]=j-1; break; } } } //if data point was not visible across enough scans, set it to 0 if(match<cs.boxcarFilter && match<(int)v.size()) ps.at(i).intensity=0.0; } //Average points and apply cutoff for(i=0;i<ps.size();i++) { ps.at(i).intensity/=numScans; sp.add(ps.at(i)); //if(ps.at(i).intensity>=cutoff) sp.add(ps.at(i)); } sp.setScanNumber(ps.getScanNumber()); sp.setScanNumber(ps.getScanNumber(true),true); sp.setRTime(ps.getRTime()); sp.setRawFilter(cFilter1); //clear unused buffer if(posLeft>0){ while(posLeft>0){ bs.pop_front(); posLeft--; posA--; } } //cout << "Done averaging" << endl; return true; }
// Function reads in the spectra from the data stored for each PSMClass object void PepXMLClass::readInSpectra() { string curSpectrumFilePath; deque<PSMClass>::iterator curPSM; SpecStruct *spec = NULL; bool status; string spectrumFileName; int scanNum; int ctr = 0; int N = (signed) PSMvec->size(); if(g_ext == "mgf") { parseMGF(); return; } // mstoolkit to read MS2 spectra MSReader *reader = new MSReader(); reader->setFilter(MS2); // mstoolkit spectrum object Spectrum *S = NULL; // Extract from each PSM, it's parent spectrum file and scan number for(curPSM = PSMvec->begin(); curPSM != PSMvec->end(); curPSM++) { spectrumFileName = curPSM->getSpectrumFileName(); scanNum = curPSM->getScanNumber(); // code to get the full path to the input spectrum file filesystem::path curFile( spectrumFileName.c_str() ); filesystem::path spectral_dir( g_srcDir.c_str() ); filesystem::path curFilePath( spectral_dir/curFile ); curSpectrumFilePath = curFilePath.file_string(); // if the source file is not located, drop the spectrum if( !boost::filesystem::exists(curSpectrumFilePath) ) { curPSM = PSMvec->erase(curPSM); continue; } // Read the spectrum for 'scanNum' into 'S' S = new Spectrum(); status = reader->readFile(curSpectrumFilePath.c_str(), *S, scanNum); if(S->getScanNumber() == 0) { cerr << "Failed to get " << curSpectrumFilePath.c_str() << " scan#: " << scanNum << endl; exit(0); } spec = new SpecStruct(); for(int j = 0; j < S->size(); j++) { spec->mz.push_back( S->at(j).mz ); spec->intensity.push_back( S->at(j).intensity ); } delete(S); S = NULL; // Assign the data in 'S' to curPSM curPSM->recordSpectrum(*spec); delete(spec); spec = NULL; ctr++; printProgress("Reading in spectra (please be patient)...", ctr, N); } delete(reader); reader = NULL; cerr << endl; // prettier stderr }