// lists of peptide hits in "maps" will be sorted void MapAlignmentAlgorithmIdentification::getRetentionTimes_( MSExperiment<> & experiment, SeqToList & rt_data) { for (MSExperiment<>::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data); } // duplicates should not be possible -> no need to remove them }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //varaibles String db, user, password, host, in; Int port; bool init = getFlag_("init"); if (!init) { in = getStringOption_("in"); } db = getStringOption_("db"); user = getStringOption_("user"); password = getStringOption_("password"); host = getStringOption_("host"); port = getIntOption_("port"); //------------------------------------------------------------- // reading input //------------------------------------------------------------- DBConnection con; con.connect(db, user, password, host, port); DBAdapter a(con); if (init) { a.createDB(); } else { //load input file data MSExperiment<Peak1D> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); //store data a.storeExperiment(exp); writeLog_(String(" written file to DB (id: ") + (double)(exp.getPersistenceId()) + ")"); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // if meta data arrays are present, remove them and warn //------------------------------------------------------------- if (exp.clearMetaDataArrays()) { writeLog_("Warning: Spectrum meta data arrays cannot be sorted. They are deleted."); } //------------------------------------------------------------- // filter //------------------------------------------------------------- Param filter_param = getParam_().copy("algorithm:", true); writeDebug_("Used filter parameters", filter_param, 3); BernNorm filter; filter.setParameters(filter_param); filter.filterPeakMap(exp); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FILTERING)); f.store(out, exp); return EXECUTION_OK; }
bool IDEvaluationBase::addSearchFile(const String& file_name) { MSSpectrum<> points; if (!loadCurve(file_name, points)) return false; data_.addSpectrum(points); MSExperiment<>* exp = new MSExperiment<>(); exp->addSpectrum(points); spec_1d_->canvas()->addLayer(SpectrumCanvas::ExperimentSharedPtrType(exp)); spec_1d_->canvas()->setLayerName(spec_1d_->canvas()->getLayerCount() - 1, points.getMetaValue("search_engine")); // set intensity mode (after spectrum has been added!) setIntensityMode((int) SpectrumCanvas::IM_SNAP); return true; }
void TOFCalibration::matchMasses_(MSExperiment<> & calib_peaks, std::vector<std::vector<unsigned int> > & monoiso_peaks, std::vector<unsigned int> & obs_masses, std::vector<double> & exp_masses, unsigned int idx) { for (unsigned int i = 0; i < monoiso_peaks[idx].size(); ++i) { for (unsigned int j = 0; j < exp_masses_.size(); ++j) { if (fabs(((calib_peaks.begin() + idx)->begin() + (monoiso_peaks[idx])[i])->getMZ() - exp_masses_[j]) < 1) { obs_masses.push_back((monoiso_peaks[idx])[i]); exp_masses.push_back(exp_masses_[j]); break; } } } #ifdef DEBUG_CALIBRATION std::cout << "\n\n---------\nmatching monoisotopic peaks\n"; for (unsigned int i = 0; i < obs_masses.size(); ++i) { std::cout << ((calib_peaks_ft_.begin() + idx)->begin() + obs_masses[i])->getMZ() << "\t" << exp_masses[i] << std::endl; } #endif }
DoubleReal IsobaricChannelExtractor::sumPotentialIsotopePeaks_(const MSExperiment<Peak1D>::ConstIterator& precursor, const Peak1D::CoordinateType& lower_mz_bound, const Peak1D::CoordinateType& upper_mz_bound, Peak1D::CoordinateType theoretical_mz, const Peak1D::CoordinateType isotope_offset) const { DoubleReal intensity_contribution = 0.0; // move theoretical_mz to first potential isotopic peak theoretical_mz += isotope_offset; // check if we are still in the isolation window while (theoretical_mz > lower_mz_bound && theoretical_mz < upper_mz_bound) { Size potential_peak = precursor->findNearest(theoretical_mz); // is isotopic ? if (fabs(theoretical_mz - (*precursor)[potential_peak].getMZ()) < max_precursor_isotope_deviation_) { intensity_contribution += (*precursor)[potential_peak].getIntensity(); } else { // we abort in case of missing peaks break; } // update mz with the defined offset theoretical_mz += isotope_offset; } return intensity_contribution; }
void MascotXMLFile::initializeLookup(SpectrumMetaDataLookup& lookup, const MSExperiment<>& exp, const String& scan_regex) { // load spectra and extract scan numbers from the native IDs // (expected format: "... scan=#"): lookup.readSpectra(exp.getSpectra()); if (scan_regex.empty()) // use default formats { if (!lookup.empty()) // raw data given -> spectrum look-up possible { // possible formats and resulting scan numbers: // - Mascot 2.3 (?): // <pep_scan_title>scan=818</pep_scan_title> -> 818 // - ProteomeDiscoverer/Mascot 2.3 or 2.4: // <pep_scan_title>Spectrum136 scans:712,</pep_scan_title> -> 712 // - other variants: // <pep_scan_title>Spectrum3411 scans: 2975,</pep_scan_title> -> 2975 // <...>File773 Spectrum198145 scans: 6094</...> -> 6094 // <...>6860: Scan 10668 (rt=5380.57)</...> -> 10668 // <pep_scan_title>Scan Number: 1460</pep_scan_title> -> 1460 lookup.addReferenceFormat("[Ss]can( [Nn]umber)?s?[=:]? *(?<SCAN>\\d+)"); // - with .dta input to Mascot: // <...>/path/to/FTAC05_13.673.673.2.dta</...> -> 673 lookup.addReferenceFormat("\\.(?<SCAN>\\d+)\\.\\d+\\.(?<CHARGE>\\d+)(\\.dta)?"); } // title containing RT and MZ instead of scan number: // <...>575.848571777344_5018.0811_controllerType=0 controllerNumber=1 scan=11515_EcoliMS2small</...> lookup.addReferenceFormat("^(?<MZ>\\d+(\\.\\d+)?)_(?<RT>\\d+(\\.\\d+)?)"); } else // use only user-defined format { lookup.addReferenceFormat(scan_regex); } }
void SeedListGenerator::generateSeedList(const MSExperiment<>& experiment, SeedList& seeds) { seeds.clear(); for (MSExperiment<>::ConstIterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { if (exp_it->getMSLevel() == 2) // MS2 spectrum -> look for precursor { MSExperiment<>::ConstIterator prec_it = experiment.getPrecursorSpectrum(exp_it); const vector<Precursor>& precursors = exp_it->getPrecursors(); DPosition<2> point(prec_it->getRT(), precursors[0].getMZ()); seeds.push_back(point); } } }
void MapAlignmentTransformer::transformSinglePeakMap(MSExperiment<> & msexp, const TransformationDescription & trafo) { msexp.clearRanges(); // Transform spectra for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter) { DoubleReal rt = mse_iter->getRT(); mse_iter->setRT(trafo.apply(rt)); } // Also transform chromatograms DoubleReal rt; std::vector<MSChromatogram<ChromatogramPeak> > chromatograms; for (Size i = 0; i < msexp.getChromatograms().size(); i++) { MSChromatogram<ChromatogramPeak> chromatogram = msexp.getChromatograms()[i]; for (Size j = 0; j < chromatogram.size(); j++) { rt = chromatogram[j].getRT(); chromatogram[j].setRT(trafo.apply(rt)); } chromatograms.push_back(chromatogram); } msexp.setChromatograms(chromatograms); msexp.updateRanges(); }
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); MSExperiment spectra; MzMLFile f; // load mzML from code examples folder f.load(tutorial_data_path + "/data/Tutorial_GaussFilter.mzML", spectra); // iterate over map and output MS2 precursor information for (auto s_it = spectra.begin(); s_it != spectra.end(); ++s_it) { // we are only interested in MS2 spectra so we skip all other levels if (s_it->getMSLevel() != 2) continue; // get a reference to the precursor information const MSSpectrum& spectrum = *s_it; const vector<Precursor>& precursors = spectrum.getPrecursors(); // size check & throw exception if needed if (precursors.empty()) throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, precursors.size()); // get m/z and intensity of precursor double precursor_mz = precursors[0].getMZ(); float precursor_int = precursors[0].getIntensity(); // retrieve the precursor spectrum (the most recent MS1 spectrum) PeakMap::ConstIterator precursor_spectrum = spectra.getPrecursorSpectrum(s_it); double precursor_rt = precursor_spectrum->getRT(); // output precursor information std::cout << " precusor m/z: " << precursor_mz << " intensity: " << precursor_int << " retention time (sec.): " << precursor_rt << std::endl; } return 0; } // end of main
void PeakPickerSH::pickExperiment(const MSExperiment<> & input, MSExperiment<> & output) { // make sure that output is clear output.clear(true); // copy experimental settings static_cast<ExperimentalSettings &>(output) = input; // resize output with respect to input output.resize(input.size()); std::cout << "Before loop, input size = " << input.size() << std::endl; Size progress = 0; for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx) { output[scan_idx].clear(true); output[scan_idx].SpectrumSettings::operator=(input[scan_idx]); output[scan_idx].MetaInfoInterface::operator=(input[scan_idx]); output[scan_idx].setRT(input[scan_idx].getRT()); output[scan_idx].setMSLevel(input[scan_idx].getMSLevel()); output[scan_idx].setName(input[scan_idx].getName()); output[scan_idx].setType(SpectrumSettings::PEAKS); if (input[scan_idx].getMSLevel() != 1) { // When not considering MS2 data (MS2 fragment mass tracing=0), Lukas leaves out // the entire scan (instead of just copying it to the output as seen in // another plugin). // pick(input[scan_idx], output[scan_idx], 4.0); } else { // TODO: Read value 4.0 from parameters # PeakPickerSH.cpp pick(input[scan_idx], output[scan_idx], 5.0); } setProgress(++progress); } std::cout << "After loop" << std::endl; endProgress(); }
void MapAlignmentTransformer::transformRetentionTimes( MSExperiment<>& msexp, const TransformationDescription& trafo, bool store_original_rt) { msexp.clearRanges(); // Transform spectra for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter) { double rt = mse_iter->getRT(); if (store_original_rt) storeOriginalRT_(*mse_iter, rt); mse_iter->setRT(trafo.apply(rt)); } // Also transform chromatograms for (Size i = 0; i < msexp.getNrChromatograms(); ++i) { MSChromatogram<ChromatogramPeak>& chromatogram = msexp.getChromatogram(i); vector<double> original_rts; if (store_original_rt) original_rts.reserve(chromatogram.size()); for (Size j = 0; j < chromatogram.size(); j++) { double rt = chromatogram[j].getRT(); if (store_original_rt) original_rts.push_back(rt); chromatogram[j].setRT(trafo.apply(rt)); } if (store_original_rt && !chromatogram.metaValueExists("original_rt")) { chromatogram.setMetaValue("original_rt", original_rts); } } msexp.updateRanges(); }
void MSQuantifications::registerExperiment(MSExperiment<Peak1D> & exp, std::vector<std::vector<std::pair<String, DoubleReal> > > label) { for (std::vector<std::vector<std::pair<String, DoubleReal> > >::const_iterator lit = label.begin(); lit != label.end(); ++lit) { //TODO look for existing labels Assay a; a.mods_ = (*lit); a.raw_files_.push_back(exp.getExperimentalSettings()); assays_.push_back(a); } data_processings_ = exp[0].getDataProcessing(); //TODO check if empty, overwrite MSExperiments inherited front method to work. [0] operator is ugly! }
DoubleReal IsobaricChannelExtractor::computePrecursorPurity_(const MSExperiment<Peak1D>::ConstIterator& ms2_spec, const MSExperiment<Peak1D>::ConstIterator& precursor) const { // we cannot analyze precursors without a charge if (ms2_spec->getPrecursors()[0].getCharge() == 0) return 1.0; // compute boundaries const MSExperiment<>::SpectrumType::ConstIterator isolation_lower_mz = precursor->MZBegin(ms2_spec->getPrecursors()[0].getMZ() - ms2_spec->getPrecursors()[0].getIsolationWindowLowerOffset()); const MSExperiment<>::SpectrumType::ConstIterator isolation_upper_mz = precursor->MZEnd(ms2_spec->getPrecursors()[0].getMZ() + ms2_spec->getPrecursors()[0].getIsolationWindowUpperOffset()); Peak1D::IntensityType total_intensity = 0; // get total intensity for (MSExperiment<>::SpectrumType::ConstIterator isolation_it = isolation_lower_mz; isolation_it != isolation_upper_mz; ++isolation_it) { total_intensity += isolation_it->getIntensity(); } // now get the intensity of the precursor .. we assume everything in the distance of 1/c to belong to the precursor // for c == charge of precursor // precursor mz Size precursor_peak_idx = precursor->findNearest(ms2_spec->getPrecursors()[0].getMZ()); Peak1D precursor_peak = (*precursor)[precursor_peak_idx]; Peak1D::IntensityType precursor_intensity = precursor_peak.getIntensity(); // compute the double charge_dist = Constants::NEUTRON_MASS_U / (double) ms2_spec->getPrecursors()[0].getCharge(); // search left of precursor for isotopic peaks precursor_intensity += sumPotentialIsotopePeaks_(precursor, isolation_lower_mz->getMZ(), isolation_upper_mz->getMZ(), precursor_peak.getMZ(), -1 * charge_dist); // search right of precursor for isotopic peaks precursor_intensity += sumPotentialIsotopePeaks_(precursor, isolation_lower_mz->getMZ(), isolation_upper_mz->getMZ(), precursor_peak.getMZ(), charge_dist); return precursor_intensity / total_intensity; }
ExitCodes main_(int, const char **) { //---------------------------------------------------------------- // load data //---------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); DoubleReal sampling_rate = getDoubleOption_("sampling_rate"); LinearResampler lin_resampler; Param resampler_param; resampler_param.setValue("spacing", sampling_rate); lin_resampler.setParameters(resampler_param); // resample every scan for (Size i = 0; i < exp.size(); ++i) { lin_resampler.raster(exp[i]); } //clear meta data because they are no longer meaningful exp.clearMetaDataArrays(); //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::DATA_PROCESSING)); //store output f.store(out, exp); return EXECUTION_OK; }
void getSwathFile(MSExperiment<>& exp, int nr_swathes=32, bool ms1=true) { if (ms1) { MSSpectrum<> s; s.setMSLevel(1); Peak1D p; p.setMZ(100); p.setIntensity(200); s.push_back(p); exp.addSpectrum(s); } for (int i = 0; i< nr_swathes; i++) { MSSpectrum<> s; s.setMSLevel(2); std::vector<Precursor> prec(1); prec[0].setIsolationWindowLowerOffset(12.5); prec[0].setIsolationWindowUpperOffset(12.5); prec[0].setMZ(400 + i*25 + 12.5); s.setPrecursors(prec); Peak1D p; p.setMZ(101 + i); p.setIntensity(201 + i); s.push_back(p); exp.addSpectrum(s); } }
int main() { // create a peak map containing 4 dummy spectra and peaks MSExperiment exp; // The following examples creates a MSExperiment containing four MSSpectrum instances. for (Size i = 0; i < 4; ++i) { MSSpectrum spectrum; spectrum.setRT(i); spectrum.setMSLevel(1); for (float mz = 500.0; mz <= 900; mz += 100.0) { Peak1D peak; peak.setMZ(mz + i); spectrum.push_back(peak); } exp.addSpectrum(spectrum); } // Iteration over the RT range (2,3) and the m/z range (603,802) and print the peak positions. for (auto it = exp.areaBegin(2.0, 3.0, 603.0, 802.0); it != exp.areaEnd(); ++it) { cout << it.getRT() << " - " << it->getMZ() << endl; } // Iteration over all peaks in the experiment. // Output: RT, m/z, and intensity // Note that the retention time is stored in the spectrum (not in the peak object) for (auto s_it = exp.begin(); s_it != exp.end(); ++s_it) { for (auto p_it = s_it->begin(); p_it != s_it->end(); ++p_it) { cout << s_it->getRT() << " - " << p_it->getMZ() << " " << p_it->getIntensity() << endl; } } // We could store the spectra to a mzML file with: // MzMLFile mzml; // mzml.store(filename, exp); // And load it with // mzml.load(filename, exp); // If we wanted to load only the MS2 spectra we could speed up reading by setting: // mzml.getOptions().addMSLevel(2); // before executing: mzml.load(filename, exp); return 0; } //end of main
TEST_EQUAL(file.getNrSpectra(), 2) } END_SECTION START_SECTION(( size_t getNrChromatograms() const )) { IndexedMzMLFile file(OPENMS_GET_TEST_DATA_PATH("IndexedmzMLFile_1.mzML")); TEST_EQUAL(file.getNrChromatograms(), 1) } END_SECTION START_SECTION(( OpenMS::Interfaces::SpectrumPtr getSpectrumById(int id) )) { IndexedMzMLFile file(OPENMS_GET_TEST_DATA_PATH("IndexedmzMLFile_1.mzML")); MSExperiment<> exp; MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("IndexedmzMLFile_1.mzML"),exp); TEST_EQUAL(file.getNrSpectra(), exp.getSpectra().size()) OpenMS::Interfaces::SpectrumPtr spec = file.getSpectrumById(0); TEST_EQUAL(spec->getMZArray()->data.size(), exp.getSpectra()[0].size() ) TEST_EQUAL(spec->getIntensityArray()->data.size(), exp.getSpectra()[0].size() ) // Test Exceptions TEST_EXCEPTION(Exception::IllegalArgument,file.getSpectrumById(-1)); TEST_EXCEPTION(Exception::IllegalArgument,file.getSpectrumById( file.getNrSpectra()+1)); { IndexedMzMLFile file(OPENMS_GET_TEST_DATA_PATH("fileDoesNotExist")); TEST_EQUAL(file.getParsingSuccess(), false)
ExitCodes main_(int, const char**) { vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_ids; ProteinHit temp_protein_hit; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_id = getStringOption_("id"); String inputfile_feature = getStringOption_("feature"); String inputfile_consensus = getStringOption_("consensus"); String inputfile_raw = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //~ bool Ms1(getFlag_("MS1")); //~ bool Ms2(getFlag_("MS2")); bool remove_duplicate_features(getFlag_("remove_duplicate_features")); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); QcMLFile qcmlfile; //------------------------------------------------------------- // MS aqiusition //------------------------------------------------------------ String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName(); cout << "Reading mzML file..." << endl; MzMLFile mz_data_file; MSExperiment<Peak1D> exp; MzMLFile().load(inputfile_raw, exp); //---prep input exp.sortSpectra(); UInt min_mz = std::numeric_limits<UInt>::max(); UInt max_mz = 0; std::map<Size, UInt> mslevelcounts; qcmlfile.registerRun(base_name,base_name); //TODO use UIDs //---base MS aquisition qp String msaq_ref = base_name + "_msaq"; QcMLFile::QualityParameter qp; qp.id = msaq_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000004"; try { //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details"); //~ cout << test.name << test.id << endl; const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004"); qp.name = term.name; ///< Name } catch (...) { qp.name = "mzML file"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //---file origin qp qp = QcMLFile::QualityParameter(); qp.name = "mzML file"; ///< Name qp.id = base_name + "_run_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000577"; qp.value = base_name; qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "instrument model"; ///< Name qp.id = base_name + "_instrument_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000031"; qp.value = exp.getInstrument().getName(); qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "completion time"; ///< Name qp.id = base_name + "_date"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000747"; qp.value = exp.getDateTime().getDate(); qcmlfile.addRunQualityParameter(base_name, qp); //---precursors at QcMLFile::Attachment at; at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000044"; at.qualityRef = msaq_ref; at.id = base_name + "_precursors"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "precursors"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); //RT at.colTypes.push_back("MS:1000040"); //MZ for (Size i = 0; i < exp.size(); ++i) { mslevelcounts[exp[i].getMSLevel()]++; if (exp[i].getMSLevel() == 2) { if (exp[i].getPrecursors().front().getMZ() < min_mz) { min_mz = exp[i].getPrecursors().front().getMZ(); } if (exp[i].getPrecursors().front().getMZ() > max_mz) { max_mz = exp[i].getPrecursors().front().getMZ(); } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(exp[i].getPrecursors().front().getMZ()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); //---aquisition results qp qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms1aquisition"; ///< Identifier qp.value = String(mslevelcounts[1]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms1 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms2aquisition"; ///< Identifier qp.value = String(mslevelcounts[2]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms2 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results" qp.id = base_name + "_Chromaquisition"; ///< Identifier qp.value = String(exp.getChromatograms().size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of chromatograms"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000009"; at.qualityRef = msaq_ref; at.id = base_name + "_mzrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS MZ aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000010"); //MZ at.colTypes.push_back("QC:0000011"); //MZ std::vector<String> rowmz; rowmz.push_back(String(min_mz)); rowmz.push_back(String(max_mz)); at.tableRows.push_back(rowmz); qcmlfile.addRunAttachment(base_name, at); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000012"; at.qualityRef = msaq_ref; at.id = base_name + "_rtrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS RT aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000013"); //MZ at.colTypes.push_back("QC:0000014"); //MZ std::vector<String> rowrt; rowrt.push_back(String(exp.begin()->getRT())); rowrt.push_back(String(exp.getSpectra().back().getRT())); at.tableRows.push_back(rowrt); qcmlfile.addRunAttachment(base_name, at); //---ion current stability ( & tic ) qp at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000022"; at.qualityRef = msaq_ref; at.id = base_name + "_tics"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS TICs"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); at.colTypes.push_back("MS:1000285"); UInt max = 0; Size below_10k = 0; for (Size i = 0; i < exp.size(); ++i) { if (exp[i].getMSLevel() == 1) { UInt sum = 0; for (Size j = 0; j < exp[i].size(); ++j) { sum += exp[i][j].getIntensity(); } if (sum > max) { max = sum; } if (sum < 10000) { ++below_10k; } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(sum); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.id = base_name + "_ticslump"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000023"; qp.value = String((100 / exp.size()) * below_10k); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "percentage of tic slumps"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //------------------------------------------------------------- // MS id //------------------------------------------------------------ if (inputfile_id != "") { IdXMLFile().load(inputfile_id, prot_ids, pep_ids); cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl; ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters(); vector<String> var_mods = params.variable_modifications; //~ boost::regex re("(?<=[KR])(?=[^P])"); String msid_ref = base_name + "_msid"; QcMLFile::QualityParameter qp; qp.id = msid_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000025"; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS identification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000026"; at.qualityRef = msid_ref; at.id = base_name + "_idsetting"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS id settings"; ///< Name } at.colTypes.push_back("MS:1001013"); //MS:1001013 db name MS:1001016 version MS:1001020 taxonomy at.colTypes.push_back("MS:1001016"); at.colTypes.push_back("MS:1001020"); std::vector<String> row; row.push_back(String(prot_ids.front().getSearchParameters().db)); row.push_back(String(prot_ids.front().getSearchParameters().db_version)); row.push_back(String(prot_ids.front().getSearchParameters().taxonomy)); at.tableRows.push_back(row); qcmlfile.addRunAttachment(base_name, at); UInt spectrum_count = 0; Size peptide_hit_count = 0; UInt runs_count = 0; Size protein_hit_count = 0; set<String> peptides; set<String> proteins; Size missedcleavages = 0; for (Size i = 0; i < pep_ids.size(); ++i) { if (!pep_ids[i].empty()) { ++spectrum_count; peptide_hit_count += pep_ids[i].getHits().size(); const vector<PeptideHit>& temp_hits = pep_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { peptides.insert(temp_hits[j].getSequence().toString()); } } } for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it) { for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st) { if (*st == 'K' || *st == 'R') { ++missedcleavages; } } } for (Size i = 0; i < prot_ids.size(); ++i) { ++runs_count; protein_hit_count += prot_ids[i].getHits().size(); const vector<ProteinHit>& temp_hits = prot_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { proteins.insert(temp_hits[j].getAccession()); } } qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000037"; ///< cv accession qp.id = base_name + "_misscleave"; ///< Identifier qp.value = missedcleavages; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of missed cleavages"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000032"; ///< cv accession qp.id = base_name + "_totprot"; ///< Identifier qp.value = protein_hit_count; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000033"; ///< cv accession qp.id = base_name + "_totuniqprot"; ///< Identifier qp.value = String(proteins.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000029"; ///< cv accession qp.id = base_name + "_psms"; ///< Identifier qp.value = String(spectrum_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of PSM"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000030"; ///< cv accession qp.id = base_name + "_totpeps"; ///< Identifier qp.value = String(peptide_hit_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000031"; ///< cv accession qp.id = base_name + "_totuniqpeps"; ///< Identifier qp.value = String(peptides.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000038"; at.qualityRef = msid_ref; at.id = base_name + "_massacc"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "delta ppm tables"; } //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M) at.colTypes.push_back("RT"); at.colTypes.push_back("MZ"); at.colTypes.push_back("Score"); at.colTypes.push_back("PeptideSequence"); at.colTypes.push_back("Charge"); at.colTypes.push_back("TheoreticalWeight"); at.colTypes.push_back("delta_ppm"); for (UInt w = 0; w < var_mods.size(); ++w) { at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_')); } std::vector<double> deltas; //~ prot_ids[0].getSearchParameters(); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { if (it->getHits().size() > 0) { std::vector<String> row; row.push_back(it->getRT()); row.push_back(it->getMZ()); PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort vector<UInt> pep_mods; for (UInt w = 0; w < var_mods.size(); ++w) { pep_mods.push_back(0); } for (AASequence::ConstIterator z = tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z) { Residue res = *z; String temp; if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl") { temp = res.getModification() + " (" + res.getOneLetterCode() + ")"; //cout<<res.getModification()<<endl; for (UInt w = 0; w < var_mods.size(); ++w) { if (temp == var_mods[w]) { //cout<<temp; pep_mods[w] += 1; } } } } row.push_back(tmp.getScore()); row.push_back(tmp.getSequence().toString().removeWhitespaces()); row.push_back(tmp.getCharge()); row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge())); double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true)); row.push_back(String(dppm)); deltas.push_back(dppm); for (UInt w = 0; w < var_mods.size(); ++w) { row.push_back(pep_mods[w]); } at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000040"; ///< cv accession qp.id = base_name + "_mean_delta"; ///< Identifier qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end())); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "mean delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000041"; ///< cv accession qp.id = base_name + "_median_delta"; ///< Identifier qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false)); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "median delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000035"; ///< cv accession qp.id = base_name + "_ratio_id"; ///< Identifier qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2])); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "id ratio"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } //------------------------------------------------------------- // MS quantitation //------------------------------------------------------------ FeatureMap map; String msqu_ref = base_name + "_msqu"; if (inputfile_feature != "") { FeatureXMLFile f; f.load(inputfile_feature, map); cout << "Read featureXML file..." << endl; //~ UInt fiter = 0; map.sortByRT(); map.updateRanges(); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000045"; ///< cv accession qp.id = msqu_ref; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS quantification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000046"; ///< cv accession qp.id = base_name + "_feature_count"; ///< Identifier qp.value = String(map.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of features"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } if (inputfile_feature != "" && !remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); at.colTypes.push_back("Quality"); at.colTypes.push_back("FWHM"); at.colTypes.push_back("IDs"); UInt fiter = 0; map.sortByRT(); //ofstream out(outputfile_name.c_str()); while (fiter < map.size()) { std::vector<String> row; row.push_back(map[fiter].getMZ()); row.push_back(map[fiter].getRT()); row.push_back(map[fiter].getIntensity()); row.push_back(map[fiter].getCharge()); row.push_back(map[fiter].getOverallQuality()); row.push_back(map[fiter].getWidth()); row.push_back(map[fiter].getPeptideIdentifications().size()); fiter++; at.tableRows.push_back(row); } qcmlfile.addRunAttachment(base_name, at); } else if (inputfile_feature != "" && remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); FeatureMap map, map_out; FeatureXMLFile f; f.load(inputfile_feature, map); UInt fiter = 0; map.sortByRT(); while (fiter < map.size()) { FeatureMap map_tmp; for (UInt k = fiter; k <= map.size(); ++k) { if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1) { //~ cout << fiter << endl; map_tmp.push_back(map[k]); } else { fiter = k; break; } } map_tmp.sortByMZ(); UInt retif = 1; map_out.push_back(map_tmp[0]); while (retif < map_tmp.size()) { if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01) { cout << "equal RT, but mass different" << endl; map_out.push_back(map_tmp[retif]); } retif++; } } qcmlfile.addRunAttachment(base_name, at); } if (inputfile_consensus != "") { cout << "Reading consensusXML file..." << endl; ConsensusXMLFile f; ConsensusMap map; f.load(inputfile_consensus, map); //~ String CONSENSUS_NAME = "_consensus.tsv"; //~ String combined_out = outputfile_name + CONSENSUS_NAME; //~ ofstream out(combined_out.c_str()); at = QcMLFile::Attachment(); qp.name = "consensuspoints"; ///< Name //~ qp.id = base_name + "_consensuses"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results" at.colTypes.push_back("Native_spectrum_ID"); at.colTypes.push_back("DECON_RT_(sec)"); at.colTypes.push_back("DECON_MZ_(Th)"); at.colTypes.push_back("DECON_Intensity"); at.colTypes.push_back("Feature_RT_(sec)"); at.colTypes.push_back("Feature_MZ_(Th)"); at.colTypes.push_back("Feature_Intensity"); at.colTypes.push_back("Feature_Charge"); for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit) { const ConsensusFeature& CF = *cmit; for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit) { std::vector<String> row; FeatureHandle FH = *cfit; row.push_back(CF.getMetaValue("spectrum_native_id")); row.push_back(CF.getRT()); row.push_back(CF.getMZ()); row.push_back(CF.getIntensity()); row.push_back(FH.getRT()); row.push_back(FH.getMZ()); row.push_back(FH.getCharge()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); } //------------------------------------------------------------- // finalize //------------------------------------------------------------ qcmlfile.store(outputfile_name); return EXECUTION_OK; }
START_SECTION(const PeakFileOptions& getOptions() const) MzMLFile file; TEST_EQUAL(file.getOptions().hasMSLevels(),false) END_SECTION START_SECTION(PeakFileOptions& getOptions()) MzMLFile file; file.getOptions().addMSLevel(1); TEST_EQUAL(file.getOptions().hasMSLevels(),true); END_SECTION */ TOLERANCE_ABSOLUTE(0.01) START_SECTION((template <typename MapType> void load(const String& filename, MapType& map))) MzMLFile file; MSExperiment<> exp; file.load(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"),exp); //test DocumentIdentifier addition TEST_STRING_EQUAL(exp.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML")); TEST_STRING_EQUAL(FileTypes::typeToName(exp.getLoadedFileType()),"mzML"); //-------------------------- general information -------------------------- TEST_EQUAL(exp.size(),4) //run TEST_EQUAL(exp.getIdentifier(),"document_accession") TEST_EQUAL(exp.getFractionIdentifier(),"the_best_fraction_ever") TEST_EQUAL(exp.getDateTime().get(),"2007-06-27 15:23:45") //contacts TEST_EQUAL(exp.getContacts().size(),2)
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
MSDataTransformingConsumer* transforming_consumer_nullPointer = 0; START_SECTION((MSDataTransformingConsumer())) transforming_consumer_ptr = new MSDataTransformingConsumer(); TEST_NOT_EQUAL(transforming_consumer_ptr, transforming_consumer_nullPointer) END_SECTION START_SECTION((~MSDataTransformingConsumer())) delete transforming_consumer_ptr; END_SECTION START_SECTION((void consumeSpectrum(SpectrumType & s))) { MSDataTransformingConsumer * transforming_consumer = new MSDataTransformingConsumer(); MSExperiment<> exp; MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"), exp); TEST_EQUAL(exp.getNrSpectra() > 0, true) MSSpectrum<> first_spectrum = exp.getSpectrum(0); transforming_consumer->setExpectedSize(2,0); transforming_consumer->consumeSpectrum(exp.getSpectrum(0)); TEST_EQUAL(first_spectrum == exp.getSpectrum(0), true) // nothing happened delete transforming_consumer; } END_SECTION START_SECTION((void consumeChromatogram(ChromatogramType & c))) {
void TOFCalibration::calculateCalibCoeffs_(MSExperiment<> & calib_spectra) { // flight times are needed later calib_peaks_ft_ = calib_spectra; // convert flight times of peaks into m/z values applyTOFConversion_(calib_spectra); std::vector<std::vector<unsigned int> > monoiso_peaks; getMonoisotopicPeaks_(calib_spectra, monoiso_peaks); startProgress(0, calib_spectra.size(), "quadratic fitting of calibrant spectra"); // do the quadratic fitting for each calibration spectra separately for (unsigned int spec = 0; spec < calib_spectra.size(); ++spec) { std::vector<unsigned int> monoiso_peaks_scan; std::vector<double> exp_masses; // match the m/z-values to the expected masses matchMasses_(calib_spectra, monoiso_peaks, monoiso_peaks_scan, exp_masses, spec); // the actual quadratic fitting part Size n = exp_masses.size(); if (n < 3) { continue; } // matrix containing the observations std::vector<double> x; // vector containing the expected masses std::vector<double> y; for (Size i = 0; i < n; i++) { // get the flight time double xi = ((calib_peaks_ft_.begin() + spec)->begin() + monoiso_peaks_scan[i])->getMZ(); x.push_back(xi); y.push_back(exp_masses[i]); } Math::QuadraticRegression qr; qr.computeRegression(x.begin(), x.end(), y.begin()); #ifdef DEBUG_CALIBRATION std::cout << "chi^2: " << qr.getChiSquared() << std::endl;//DEBUG std::cout << "a: " << qr.getA() << "b: " << qr.getB() << "c: " << qr.getC() << std::endl;//DEBUG #endif // store the coefficients coeff_quad_fit_.push_back(qr.getA()); coeff_quad_fit_.push_back(qr.getB()); coeff_quad_fit_.push_back(qr.getC()); // determine the errors in ppm for (Size p = 0; p < n; ++p) { #ifdef DEBUG_CALIBRATION std::cout << exp_masses[p] << "\t" << mQ_(calib_peaks_ft_[spec][monoiso_peaks_scan[p]].getMZ(), spec) - exp_masses[p] << std::endl; #endif errors_[exp_masses[p]].push_back((mQ_(calib_peaks_ft_[spec][monoiso_peaks_scan[p]].getMZ(), spec) - exp_masses[p])); } setProgress(spec); } endProgress(); if (coeff_quad_fit_.empty()) { String mess = String("Data can't be calibrated, not enough reference masses found: ") + coeff_quad_fit_.size() / 3; throw Exception::UnableToCalibrate(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToCalibrate", mess.c_str()); } averageErrors_(); averageCoefficients_(); }
void TOFCalibration::getMonoisotopicPeaks_(MSExperiment<> & calib_peaks, std::vector<std::vector<unsigned int> > & monoiso_peaks) { MSExperiment<>::iterator spec_iter = calib_peaks.begin(); MSExperiment<>::SpectrumType::iterator peak_iter, help_iter; #ifdef DEBUG_CALIBRATION spec_iter = calib_peaks.begin(); std::cout << "\n\nbefore---------\n\n"; // iterate through all spectra for (; spec_iter != calib_peaks.end(); ++spec_iter) { peak_iter = spec_iter->begin(); // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { std::cout << peak_iter->getMZ() << std::endl; } } #endif spec_iter = calib_peaks.begin(); // iterate through all spectra for (; spec_iter != calib_peaks.end(); ++spec_iter) { peak_iter = spec_iter->begin(); help_iter = peak_iter; std::vector<unsigned int> vec; // go through current scan while (peak_iter < spec_iter->end()) { while (peak_iter + 1 < spec_iter->end() && ((peak_iter + 1)->getMZ() - peak_iter->getMZ() < 1.2)) { ++peak_iter; } vec.push_back(distance(spec_iter->begin(), help_iter)); help_iter = peak_iter + 1; ++peak_iter; } monoiso_peaks.push_back(vec); } #ifdef DEBUG_CALIBRATION std::cout << "\n\nafter---------\n\n"; for (unsigned int i = 0; i < monoiso_peaks.size(); ++i) { for (unsigned int j = 0; j < monoiso_peaks[i].size(); ++j) { std::cout << i << "\t" << ((calib_peaks.begin() + i)->begin() + (monoiso_peaks[i])[j])->getMZ() << std::endl; } std::cout << "--------------\n"; } std::cout << "--------------\n\n\n"; #endif }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in(getStringOption_("in")); String id(getStringOption_("id")); String out(getStringOption_("out")); double fragment_mass_tolerance(getDoubleOption_("fragment_mass_tolerance")); bool fragment_mass_unit_ppm = getStringOption_("fragment_mass_unit") == "Da" ? false : true; Size max_peptide_len = getIntOption_("max_peptide_length"); Size max_num_perm = getIntOption_("max_num_perm"); AScore ascore; //------------------------------------------------------------- // loading input //------------------------------------------------------------- vector<PeptideIdentification> pep_ids; vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_out; IdXMLFile().load(id, prot_ids, pep_ids); MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); PeakFileOptions options; options.clearMSLevels(); options.addMSLevel(2); f.getOptions() = options; f.load(in, exp); exp.sortSpectra(true); SpectrumLookup lookup; lookup.readSpectra(exp.getSpectra()); for (vector<PeptideIdentification>::iterator pep_id = pep_ids.begin(); pep_id != pep_ids.end(); ++pep_id) { Size scan_id = lookup.findByRT(pep_id->getRT()); PeakSpectrum& temp = exp.getSpectrum(scan_id); vector<PeptideHit> scored_peptides; for (vector<PeptideHit>::const_iterator hit = pep_id->getHits().begin(); hit < pep_id->getHits().end(); ++hit) { PeptideHit scored_hit = *hit; addScoreToMetaValues_(scored_hit, pep_id->getScoreType()); // backup score value LOG_DEBUG << "starting to compute AScore RT=" << pep_id->getRT() << " SEQUENCE: " << scored_hit.getSequence().toString() << std::endl; PeptideHit phospho_sites = ascore.compute(scored_hit, temp, fragment_mass_tolerance, fragment_mass_unit_ppm, max_peptide_len, max_num_perm); scored_peptides.push_back(phospho_sites); } PeptideIdentification new_pep_id(*pep_id); new_pep_id.setScoreType("PhosphoScore"); new_pep_id.setHigherScoreBetter(true); new_pep_id.setHits(scored_peptides); pep_out.push_back(new_pep_id); } //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile().store(out, prot_ids, pep_out); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { // data to be passed through the algorithm vector<vector<SILACPattern> > data; MSQuantifications msq; vector<Clustering *> cluster_data; // // Parameter handling // map<String, DoubleReal> label_identifiers; // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268) handleParameters_sample(); handleParameters_algorithm(); handleParameters_labels(label_identifiers); handleParameters(); if (selected_labels.empty() && !out.empty()) // incompatible parameters { writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead."); return ILLEGAL_PARAMETERS; } // // Initializing the SILACAnalzer with our parameters // SILACAnalyzer analyzer; analyzer.setLogType(log_type_); analyzer.initialize( // section "sample" selected_labels, charge_min, charge_max, missed_cleavages, isotopes_per_peptide_min, isotopes_per_peptide_max, // section "algorithm" rt_threshold, rt_min, intensity_cutoff, intensity_correlation, model_deviation, allow_missing_peaks, // labels label_identifiers); //-------------------------------------------------- // loading input from .mzML //-------------------------------------------------- MzMLFile file; MSExperiment<Peak1D> exp; // only read MS1 spectra ... /* std::vector<int> levels; levels.push_back(1); file.getOptions().setMSLevels(levels); */ LOG_DEBUG << "Loading input..." << endl; file.setLogType(log_type_); file.load(in, exp); // set size of input map exp.updateRanges(); // extract level 1 spectra exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end()); // sort according to RT and MZ exp.sortSpectra(); if (out_mzq != "") { vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6" std::vector<std::vector<std::pair<String, DoubleReal> > > labels; //add none label labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0)))); for (Size i = 0; i < SILAClabels.size(); ++i) //SILACLabels MUST be in weight order!!! { std::vector<std::pair<String, DoubleReal> > one_label; for (UInt j = 0; j < SILAClabels[i].size(); ++j) { one_label.push_back(*(label_identifiers.find(SILAClabels[i][j]))); // this dereferencing would break if all SILAClabels would not have been checked before! } labels.push_back(one_label); } msq.registerExperiment(exp, labels); //add assays msq.assignUIDs(); } MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL; msq.setAnalysisSummaryQuantType(quant_type); //add analysis_summary_ //-------------------------------------------------- // estimate peak width //-------------------------------------------------- LOG_DEBUG << "Estimating peak width..." << endl; PeakWidthEstimator::Result peak_width; try { peak_width = analyzer.estimatePeakWidth(exp); } catch (Exception::InvalidSize &) { writeLog_("Error: Unable to estimate peak width of input data."); return INCOMPATIBLE_INPUT_DATA; } if (in_filters == "") { //-------------------------------------------------- // filter input data //-------------------------------------------------- LOG_DEBUG << "Filtering input data..." << endl; analyzer.filterData(exp, peak_width, data); //-------------------------------------------------- // store filter results //-------------------------------------------------- if (out_filters != "") { LOG_DEBUG << "Storing filtering results..." << endl; ConsensusMap map; for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it) { analyzer.generateFilterConsensusByPattern(map, *it); } analyzer.writeConsensus(out_filters, map); } } else { //-------------------------------------------------- // load filter results //-------------------------------------------------- LOG_DEBUG << "Loading filtering results..." << endl; ConsensusMap map; analyzer.readConsensus(in_filters, map); analyzer.readFilterConsensusByPattern(map, data); } //-------------------------------------------------- // clustering //-------------------------------------------------- LOG_DEBUG << "Clustering data..." << endl; analyzer.clusterData(exp, peak_width, cluster_data, data); //-------------------------------------------------------------- // write output //-------------------------------------------------------------- if (out_debug != "") { LOG_DEBUG << "Writing debug output file..." << endl; std::ofstream out((out_debug + ".clusters.csv").c_str()); vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts // generate header out << std::fixed << std::setprecision(8) << "ID,RT,MZ_PEAK,CHARGE"; for (UInt i = 1; i <= massShifts[0].size(); ++i) { out << ",DELTA_MASS_" << i + 1; } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_PEAK_" << i + 1 << '_' << j; } } out << ",MZ_RAW"; for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_RAW_" << i + 1 << '_' << j; } } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",MZ_RAW_" << i + 1 << '_' << j; } } out << '\n'; // write data UInt cluster_id = 0; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterDebug(out, **it, cluster_id); } } if (out != "") { LOG_DEBUG << "Generating output consensus map..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterConsensusByCluster(map, **it); } LOG_DEBUG << "Adding meta data..." << endl; // XXX: Need a map per mass shift ConsensusMap::FileDescriptions& desc = map.getFileDescriptions(); Size id = 0; for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it) { if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification else it->second.filename = File::basename(in); // Write correct label // (this would crash if used without a label!) if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed) ++id; } std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::DATA_PROCESSING); actions.insert(DataProcessing::PEAK_PICKING); actions.insert(DataProcessing::FILTERING); actions.insert(DataProcessing::QUANTITATION); addDataProcessing_(map, getProcessingInfo_(actions)); analyzer.writeConsensus(out, map); if (out_mzq != "") { LOG_DEBUG << "Generating output mzQuantML file..." << endl; ConsensusMap numap(map); //calc. ratios for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit) { //~ make ratio templates std::vector<ConsensusFeature::Ratio> rts; for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait) { ConsensusFeature::Ratio r; r.numerator_ref_ = String(msq.getAssays().begin()->uid_); r.denominator_ref_ = String(ait->uid_); r.description_.push_back("Simple ratio calc"); r.description_.push_back("light to medium/.../heavy"); //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>" rts.push_back(r); } const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures(); if (feature_handles.size() > 1) { std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin(); // this is unlabeled fit++; for (; fit != feature_handles.end(); ++fit) { Size ri = std::distance(feature_handles.begin(), fit); rts[ri - 1].ratio_value_ = feature_handles.begin()->getIntensity() / fit->getIntensity(); // a proper silacalanyzer algo should never have 0-intensities so no 0devison ... } } cit->setRatios(rts); } msq.addConsensusMap(numap); //add SILACAnalyzer result //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap //~ add AuditCollection - no such concept in TOPPTools yet analyzer.writeMzQuantML(out_mzq, msq); } } if (out_clusters != "") { LOG_DEBUG << "Generating cluster output file..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { UInt cluster_id = 0; analyzer.generateClusterConsensusByPattern(map, **it, cluster_id); } ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0]; desc.filename = in; desc.label = "Cluster"; analyzer.writeConsensus(out_clusters, map); } if (out_features != "") { LOG_DEBUG << "Generating output feature map..." << endl; FeatureMap<> map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterFeatureByCluster(map, **it); } analyzer.writeFeatures(out_features, map); } return EXECUTION_OK; }
/// @brief extracts the iTRAQ channels from the MS data and stores intensity values in a consensus map /// /// @param ms_exp_data Raw data to read /// @param consensus_map Output each MS² scan as a consensus feature /// @throws Exception::MissingInformation if no scans present or MS² scan has no precursor void ItraqChannelExtractor::run(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } MSExperiment<> ms_exp_MS2; String mode = (String) param_.getValue("select_activation"); std::cout << "Selecting scans with activation mode: " << (mode == "" ? "any" : mode) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(ListUtils::create<String>(mode)); for (size_t idx = 0; idx < ms_exp_data.size(); ++idx) { if (ms_exp_data[idx].getMSLevel() == 2) { if (mode == "" || activation_predicate(ms_exp_data[idx])) { // copy only MS² scans ms_exp_MS2.addSpectrum(ms_exp_data[idx]); } else { //std::cout << "deleting spectrum # " << idx << " with RT: " << ms_exp_data[idx].getRT() << "\n"; } } } #ifdef ITRAQ_DEBUG std::cout << "we have " << ms_exp_MS2.size() << " scans left of level " << ms_exp_MS2[0].getMSLevel() << std::endl; std::cout << "run: channel_map_ has " << channel_map_.size() << " entries!" << std::endl; #endif consensus_map.clear(false); // set <mapList> header Int index_cnt = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // structure of Map cm_it // first == channel-name as Int e.g. 114 // second == ChannelInfo struct ConsensusMap::FileDescription channel_as_map; // label is the channel + description provided in the Params if (itraq_type_ != TMT_SIXPLEX) channel_as_map.label = "iTRAQ_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); else channel_as_map.label = "TMT_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); channel_as_map.size = ms_exp_MS2.size(); //TODO what about .filename? leave empty? // add some more MetaInfo channel_as_map.setMetaValue("channel_name", cm_it->second.name); channel_as_map.setMetaValue("channel_id", cm_it->second.id); channel_as_map.setMetaValue("channel_description", cm_it->second.description); channel_as_map.setMetaValue("channel_center", cm_it->second.center); channel_as_map.setMetaValue("channel_active", String(cm_it->second.active ? "true" : "false")); consensus_map.getFileDescriptions()[index_cnt++] = channel_as_map; } // create consensusElements Peak2D::CoordinateType allowed_deviation = (Peak2D::CoordinateType) param_.getValue("reporter_mass_shift"); // now we have picked data // --> assign peaks to channels UInt element_index(0); for (MSExperiment<>::ConstIterator it = ms_exp_MS2.begin(); it != ms_exp_MS2.end(); ++it) { // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); if (it->getPrecursors().size() >= 1) { cf.setMZ(it->getPrecursors()[0].getMZ()); } else { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + String(it->getNativeID()) + " with RT " + String(it->getRT())); } Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel Int index = 0; Peak2D::IntensityType overall_intensity = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // set mz-position of channel channel_value.setMZ(cm_it->second.center); // reset intensity channel_value.setIntensity(0); //add up all signals for (MSExperiment<>::SpectrumType::ConstIterator mz_it = it->MZBegin(cm_it->second.center - allowed_deviation) ; mz_it != it->MZEnd(cm_it->second.center + allowed_deviation) ; ++mz_it ) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(index++, channel_value, element_index); } // ! channel_iterator // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } // ! Experiment iterator #ifdef ITRAQ_DEBUG std::cout << "processed " << element_index << " scans" << std::endl; #endif consensus_map.setExperimentType("itraq"); return; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
void IsobaricChannelExtractor::extractChannels(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry.\n"; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } // clear the output map consensus_map.clear(false); consensus_map.setExperimentType("labeled_MS2"); // create predicate for spectrum checking LOG_INFO << "Selecting scans with activation mode: " << (selected_activation_ == "" ? "any" : selected_activation_) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(StringList::create(selected_activation_)); // now we have picked data // --> assign peaks to channels UInt64 element_index(0); // remember the current precusor spectrum MSExperiment<Peak1D>::ConstIterator prec_spec = ms_exp_data.end(); for (MSExperiment<Peak1D>::ConstIterator it = ms_exp_data.begin(); it != ms_exp_data.end(); ++it) { // remember the last MS1 spectra as we assume it to be the precursor spectrum if (it->getMSLevel() == 1) prec_spec = it; if (selected_activation_ == "" || activation_predicate(*it)) { // check if precursor is available if (it->getPrecursors().empty()) { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + it->getNativeID() + " with RT " + String(it->getRT())); } // check precursor constraints if (!isValidPrecursor_(it->getPrecursors()[0])) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor doesn't fulfill all constraints." << std::endl; continue; } // check precursor purity if we have a valid precursor .. if (prec_spec != ms_exp_data.end()) { const DoubleReal purity = computePrecursorPurity_(it, prec_spec); if (purity < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold. [purity = " << purity << "]" << std::endl; continue; } } else { LOG_INFO << "No precursor available for spectrum: " << it->getNativeID() << std::endl; } if (!(prec_spec == ms_exp_data.end()) && computePrecursorPurity_(it, prec_spec) < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold." << std::endl; continue; } // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); cf.setMZ(it->getPrecursors()[0].getMZ()); Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel UInt64 map_index = 0; Peak2D::IntensityType overall_intensity = 0; for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin(); cl_it != quant_method_->getChannelInformation().end(); ++cl_it) { // set mz-position of channel channel_value.setMZ(cl_it->center); // reset intensity channel_value.setIntensity(0); // as every evaluation requires time, we cache the MZEnd iterator const MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_end = it->MZEnd(cl_it->center + reporter_mass_shift_); // add up all signals for (MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_it = it->MZBegin(cl_it->center - reporter_mass_shift_); mz_it != mz_end; ++mz_it) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } // discard contribution of this channel as it is below the required intensity threshold if (channel_value.getIntensity() < min_reporter_intensity_) { channel_value.setIntensity(0); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(map_index++, channel_value, element_index); } // ! channel_iterator // check if we keep this feature or if it contains low-intensity quantifications if (remove_low_intensity_quantifications_ && hasLowIntensityReporter_(cf)) { continue; } // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } } // ! Experiment iterator /// add meta information to the map registerChannelsInOutputMap_(consensus_map); }
void TOFCalibration::applyTOFConversion_(MSExperiment<> & calib_spectra) { MSExperiment<>::iterator spec_iter = calib_spectra.begin(); MSExperiment<>::SpectrumType::iterator peak_iter; unsigned int idx = 0; //two point conversion if (ml3s_.empty()) { for (; spec_iter != calib_spectra.end(); ++spec_iter) { peak_iter = spec_iter->begin(); double ml1, ml2; if (ml1s_.size() == 1) { ml1 = ml1s_[0]; ml2 = ml2s_[0]; } else { ml1 = ml1s_[idx]; ml2 = ml2s_[idx]; } // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { double time = peak_iter->getMZ(); peak_iter->setPos(ml1 / 1E12 * (time * 1000 - ml2)); } ++idx; } } else { // three point conversion for (; spec_iter != calib_spectra.end(); ++spec_iter) { peak_iter = spec_iter->begin(); double ml1, ml2, ml3; if (ml1s_.size() == 1) { ml1 = ml1s_[0]; ml2 = ml2s_[0]; ml3 = ml3s_[0]; } else { ml1 = ml1s_[idx]; ml2 = ml2s_[idx]; ml3 = ml3s_[idx]; } // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { double time = peak_iter->getMZ(); peak_iter->setPos((-ml2 - (0.1E7 * (-5E5 + sqrt(0.25E12 - ml1 * ml2 * ml3 + ml1 * ml3 * time))) / (ml1 * ml3) + time) / ml3); } ++idx; } } }