void ParentPeakMower::filterPeakMap(PeakMap & exp) { for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { filterSpectrum(*it); } }
void Normalizer::filterPeakMap(PeakMap & exp) { for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { filterSpectrum(*it); } }
void CompNovoIdentificationCID::getIdentifications(vector<PeptideIdentification> & pep_ids, const PeakMap & exp) { Size count(1); for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it, ++count) { //cerr << count << "/" << exp.size() << endl; PeptideIdentification id; // TODO check if both CID and ETD is present; PeakSpectrum CID_spec(*it); id.setRT(it->getRT()); id.setMZ(it->getPrecursors().begin()->getMZ()); subspec_to_sequences_.clear(); permute_cache_.clear(); decomp_cache_.clear(); getIdentification(id, CID_spec); //cerr << "size_of id=" << id.getHits().size() << endl; pep_ids.push_back(id); //++it; // //if (count == 10) //{ //return; //} } return; }
// lists of peptide hits in "maps" will be sorted bool MapAlignmentAlgorithmIdentification::getRetentionTimes_( PeakMap& experiment, SeqToList& rt_data) { for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data); } // duplicate annotations should not be possible -> no need to remove them return false; }
void WindowMower::filterPeakMap(PeakMap & exp) { bool sliding = (String)param_.getValue("movetype") == "slide" ? true : false; for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { if (sliding) { filterPeakSpectrumForTopNInSlidingWindow(*it); } else { filterPeakSpectrumForTopNInJumpingWindow(*it); } } }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in_spectra = getStringOption_("in_spectra"); String in_identifications = getStringOption_("in_identifications"); String outfile = getStringOption_("model_output_file"); Int precursor_charge = getIntOption_("precursor_charge"); //------------------------------------------------------------- // init SvmTheoreticalSpectrumGeneratorTrainer //------------------------------------------------------------- SvmTheoreticalSpectrumGeneratorTrainer trainer; Param param = getParam_().copy("algorithm:", true); String write_files = getFlag_("write_training_files") ? "true" : "false"; param.setValue("write_training_files", write_files); trainer.setParameters(param); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap map; MzMLFile().load(in_spectra, map); std::vector<PeptideIdentification> pep_ids; std::vector<ProteinIdentification> prot_ids; String tmp_str; IdXMLFile().load(in_identifications, prot_ids, pep_ids, tmp_str); IDMapper idmapper; Param par; par.setValue("rt_tolerance", 0.001); par.setValue("mz_tolerance", 0.001); idmapper.setParameters(par); idmapper.annotate(map, pep_ids, prot_ids); //generate vector of annotations std::vector<AASequence> annotations; PeakMap::iterator it; for (it = map.begin(); it != map.end(); ++it) { annotations.push_back(it->getPeptideIdentifications()[0].getHits()[0].getSequence()); } trainer.trainModel(map, annotations, outfile, precursor_charge); return EXECUTION_OK; }
p.setValue("threshold", 10.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 14) END_SECTION START_SECTION((void filterPeakMap(PeakMap& exp))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); PeakMap pm; pm.addSpectrum(spec); TEST_EQUAL(pm.begin()->size(), 121) Param p(e_ptr->getParameters()); p.setValue("threshold", 1.0); e_ptr->setParameters(p); e_ptr->filterPeakMap(pm); TEST_EQUAL(pm.begin()->size(), 121) p.setValue("threshold", 10.0); e_ptr->setParameters(p); e_ptr->filterPeakMap(pm); TEST_EQUAL(pm.begin()->size(), 14) END_SECTION
ExitCodes main_(int, const char**) { // parsing parameters String in(getStringOption_("in")); String feature_in(getStringOption_("feature_in")); String out(getStringOption_("out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); // reading input FileHandler fh; FileTypes::Type in_type = fh.getType(in); PeakMap exp; fh.loadExperiment(in, exp, in_type, log_type_, false, false); exp.sortSpectra(); FeatureMap feature_map; if (feature_in != "") { FeatureXMLFile().load(feature_in, feature_map); } // calculations FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", getIntOption_("max_charge")); ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold")); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); PeakMap exp2 = exp; exp2.clear(false); for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() != 0) { exp2.addSpectrum(*it); } } exp = exp2; exp.updateRanges(); // TODO check MS2 and MS1 counts ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, exp.size(), "Correcting precursor masses"); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { progresslogger.setProgress(exp.end() - it); if (it->getMSLevel() != 2) { continue; } // find first MS1 scan of the MS/MS scan PeakMap::Iterator ms1_it = it; while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1) { --ms1_it; } if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1) { writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT())); continue; } if (ms1_it->size() == 0) { writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1); continue; } PeakMap::Iterator ms2_it = ms1_it; ++ms2_it; while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2) { // first: error checks if (ms2_it->getPrecursors().empty()) { writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1); ++ms2_it; continue; } else if (ms2_it->getPrecursors().size() > 1) { writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one."); } Precursor prec = *ms2_it->getPrecursors().begin(); double prec_pos = prec.getMZ(); PeakMap new_exp; // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...) PeakSpectrum zoom_spec; for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit) { if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3) { zoom_spec.push_back(*pit); } } new_exp.addSpectrum(zoom_spec); new_exp.updateRanges(); FeatureMap features, seeds; ff.run("isotope_wavelet", new_exp, features, ff_param, seeds); if (features.empty()) { writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1); ++ms2_it; continue; } double max_int(numeric_limits<double>::min()); double min_dist(numeric_limits<double>::max()); Size max_int_feat_idx(0); for (Size i = 0; i != features.size(); ++i) { if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance && features[i].getIntensity() > max_int) { max_int_feat_idx = i; max_int = features[i].getIntensity(); min_dist = fabs(features[i].getMZ() - prec_pos); } } writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5); if (min_dist < precursor_mass_tolerance) { prec.setMZ(features[max_int_feat_idx].getMZ()); prec.setCharge(features[max_int_feat_idx].getCharge()); vector<Precursor> precs; precs.push_back(prec); ms2_it->setPrecursors(precs); writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1); } ++ms2_it; } it = --ms2_it; } progresslogger.endProgress(); // writing output fh.storeExperiment(out, exp, log_type_); return EXECUTION_OK; }
set<Size> correctToNearestMS1Peak(PeakMap & exp, double mz_tolerance, bool ppm, vector<double> & deltaMZs, vector<double> & mzs, vector<double> & rts) { set<Size> corrected_precursors; // load experiment and extract precursors vector<Precursor> precursors; // precursor vector<double> precursors_rt; // RT of precursor MS2 spectrum vector<Size> precursor_scan_index; getPrecursors_(exp, precursors, precursors_rt, precursor_scan_index); for (Size i = 0; i != precursors_rt.size(); ++i) { // get precursor rt double rt = precursors_rt[i]; // get precursor MZ double mz = precursors[i].getMZ(); //cout << rt << " " << mz << endl; // get precursor spectrum MSExperiment<Peak1D>::ConstIterator rt_it = exp.RTBegin(rt - 1e-8); // store index of MS2 spectrum UInt precursor_spectrum_idx = rt_it - exp.begin(); // get parent (MS1) of precursor spectrum rt_it = exp.getPrecursorSpectrum(rt_it); if (rt_it->getMSLevel() != 1) { LOG_WARN << "Error: no MS1 spectrum for this precursor" << endl; } //cout << rt_it->getRT() << " " << rt_it->size() << endl; // find peak (index) closest to expected position Size nearest_peak_idx = rt_it->findNearest(mz); // get actual position of closest peak double nearest_peak_mz = (*rt_it)[nearest_peak_idx].getMZ(); // calculate error between expected and actual position double nearestPeakError = ppm ? abs(nearest_peak_mz - mz)/mz * 1e6 : abs(nearest_peak_mz - mz); // check if error is small enough if (nearestPeakError < mz_tolerance) { // sanity check: do we really have the same precursor in the original and the picked spectrum if (fabs(exp[precursor_spectrum_idx].getPrecursors()[0].getMZ() - mz) > 0.0001) { LOG_WARN << "Error: index is referencing different precursors in original and picked spectrum." << endl; } // cout << mz << " -> " << nearest_peak_mz << endl; double deltaMZ = nearest_peak_mz - mz; deltaMZs.push_back(deltaMZ); mzs.push_back(mz); rts.push_back(rt); // correct entries Precursor corrected_prec = precursors[i]; corrected_prec.setMZ(nearest_peak_mz); exp[precursor_spectrum_idx].getPrecursors()[0] = corrected_prec; corrected_precursors.insert(precursor_spectrum_idx); } } return corrected_precursors; }
TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d"); TEST_EQUAL(e.size(), 9); ABORT_IF(e.size() != 9) TEST_STRING_EQUAL(e[0].getNativeID(),"index=0") TEST_STRING_EQUAL(e[1].getNativeID(),"index=1") TEST_STRING_EQUAL(e[2].getNativeID(),"index=2") TEST_STRING_EQUAL(e[3].getNativeID(),"index=3") TEST_STRING_EQUAL(e[4].getNativeID(),"index=4") TEST_STRING_EQUAL(e[5].getNativeID(),"index=5") TEST_STRING_EQUAL(e[6].getNativeID(),"index=6") TEST_STRING_EQUAL(e[7].getNativeID(),"index=7") TEST_STRING_EQUAL(e[8].getNativeID(),"index=8") PeakMap::const_iterator it(e.begin()); TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02) TEST_REAL_SIMILAR(it->getRT(), 4711.1) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 47218.89) ++it; TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 231.51) TEST_REAL_SIMILAR(it->getRT(), 4711.2) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 89935.22) ++it; TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 139.42) TEST_REAL_SIMILAR(it->getRT(), 4711.3) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 318.52) ++it;
void MassTraceDetection::run(const PeakMap& input_exp, std::vector<MassTrace>& found_masstraces) { // make sure the output vector is empty found_masstraces.clear(); // gather all peaks that are potential chromatographic peak apices // - use work_exp for actual work (remove peaks below noise threshold) // - store potential apices in chrom_apices PeakMap work_exp; MapIdxSortedByInt chrom_apices; Size total_peak_count(0); std::vector<Size> spec_offsets; spec_offsets.push_back(0); Size spectra_count(0); // *********************************************************** // // Step 1: Detecting potential chromatographic apices // *********************************************************** // for (PeakMap::ConstIterator it = input_exp.begin(); it != input_exp.end(); ++it) { // check if this is a MS1 survey scan if (it->getMSLevel() != 1) continue; std::vector<Size> indices_passing; for (Size peak_idx = 0; peak_idx < it->size(); ++peak_idx) { double tmp_peak_int((*it)[peak_idx].getIntensity()); if (tmp_peak_int > noise_threshold_int_) { // Assume that noise_threshold_int_ contains the noise level of the // data and we want to be chrom_peak_snr times above the noise level // --> add this peak as possible chromatographic apex if (tmp_peak_int > chrom_peak_snr_ * noise_threshold_int_) { chrom_apices.insert(std::make_pair(tmp_peak_int, std::make_pair(spectra_count, indices_passing.size()))); } indices_passing.push_back(peak_idx); ++total_peak_count; } } PeakMap::SpectrumType tmp_spec(*it); tmp_spec.select(indices_passing); work_exp.addSpectrum(tmp_spec); spec_offsets.push_back(spec_offsets.back() + tmp_spec.size()); ++spectra_count; } if (spectra_count < 3) { throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Input map consists of too few MS1 spectra (less than 3!). Aborting...", String(spectra_count)); } // discard last spectrum's offset spec_offsets.pop_back(); // ********************************************************************* // Step 2: start extending mass traces beginning with the apex peak (go // through all peaks in order of decreasing intensity) // ********************************************************************* run_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces); return; } // end of MassTraceDetection::run
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- //determine maximum peak exp.updateRanges(); double max = exp.getMaxInt() / 100.0; for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { if (it->getMSLevel() < 2) { for (PeakMap::SpectrumType::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / max); } } } /// @todo add chromatogram support for normalization, e.g. for MRM stuff (Andreas) /* vector<MSChromatogram > chroms = exp.getChromatograms(); double sum(0); for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { sum += it2->getIntensity(); } } for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / sum * 1000000.0); } } exp.setChromatograms(chroms); */ //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::NORMALIZATION)); f.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int argc, const char** argv) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")), out(getStringOption_("out")); FileHandler fh; FileTypes::Type in_type = fh.getType(in); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; // keep only MS2 spectra fh.getOptions().addMSLevel(2); fh.loadExperiment(in, exp, in_type, log_type_); writeDebug_(String("Spectra loaded: ") + exp.size(), 2); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } //------------------------------------------------------------- // calculations //------------------------------------------------------------- Param mascot_param = getParam_().copy("Mascot_parameters:", true); MascotGenericFile mgf_file; Param p; // TODO: switch this to mzML (much smaller) p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced")); p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced")); p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced")); mgf_file.setParameters(mascot_param); // get the spectra into string stream writeDebug_("Writing MGF file to stream", 1); stringstream ss; mgf_file.store(ss, in, exp, true); // write in compact format // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the // QEventLoop to process the signals and slots and grab the results afterwards from // the MascotRemotQuery instance char** argv2 = const_cast<char**>(argv); QCoreApplication event_loop(argc, argv2); MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop); Param mascot_query_param = getParam_().copy("Mascot_server:", true); writeDebug_("Setting parameters for Mascot query", 1); mascot_query->setParameters(mascot_query_param); writeDebug_("Setting spectra for Mascot query", 1); mascot_query->setQuerySpectra(ss.str()); // remove unnecessary spectra ss.clear(); QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit())); QTimer::singleShot(1000, mascot_query, SLOT(run())); writeDebug_("Fire off Mascot query", 1); event_loop.exec(); writeDebug_("Mascot query finished", 1); if (mascot_query->hasError()) { writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage()); delete mascot_query; return EXTERNAL_PROGRAM_ERROR; } // write Mascot response to file String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response"); QFile mascot_tmp_file(mascot_tmp_file_name.c_str()); mascot_tmp_file.open(QIODevice::WriteOnly); mascot_tmp_file.write(mascot_query->getMascotXMLResponse()); mascot_tmp_file.close(); // clean up delete mascot_query; vector<PeptideIdentification> pep_ids; ProteinIdentification prot_id; // set up mapping between scan numbers and retention times: MascotXMLFile::RTMapping rt_mapping; MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); // read the response MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping); writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5); // for debugging errors relating to unexpected response files if (this->debug_level_ >= 100) { writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100); } else { // delete file mascot_tmp_file.remove(); } // keep or delete protein identifications?! vector<ProteinIdentification> prot_ids; if (!getFlag_("keep_protein_links")) { // remove protein links from peptides for (Size i = 0; i < pep_ids.size(); ++i) { std::vector<PeptideHit> hits = pep_ids[i].getHits(); for (Size h = 0; h < hits.size(); ++h) { hits[h].setPeptideEvidences(vector<PeptideEvidence>()); } pep_ids[i].setHits(hits); } // remove proteins std::vector<ProteinHit> p_hit; prot_id.setHits(p_hit); } prot_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile().store(out, prot_ids, pep_ids); return EXECUTION_OK; }
void correct(PeakMap & exp, vector<DoubleReal> & deltaMZs, vector<DoubleReal> & mzs, vector<DoubleReal> & rts) { // load experiment and extract precursors vector<Precursor> precursors; // precursor vector<double> precursors_rt; // RT of precursor MS2 spectrum getPrecursors_(exp, precursors, precursors_rt); for (Size i = 0; i != precursors_rt.size(); ++i) { // get precursor rt DoubleReal rt = precursors_rt[i]; // get precursor MZ DoubleReal mz = precursors[i].getMZ(); //cout << rt << " " << mz << endl; // get precursor spectrum MSExperiment<Peak1D>::ConstIterator rt_it = exp.RTBegin(rt); // store index of MS2 spectrum UInt precursor_spectrum_idx = rt_it - exp.begin(); // get parent (MS1) of precursor spectrum rt_it = exp.getPrecursorSpectrum(rt_it); if (rt_it->getMSLevel() != 1) { cout << "Error: no MS1 spectrum for this precursor" << endl; } //cout << rt_it->getRT() << " " << rt_it->size() << endl; // find peak (index) closest to expected position Size nearest_peak_idx = rt_it->findNearest(mz); // get actual position of closest peak DoubleReal nearest_peak_mz = (*rt_it)[nearest_peak_idx].getMZ(); // calculate error between expected and actual position DoubleReal nearestPeakError = abs(nearest_peak_mz - mz); // check if error is small enough if (nearestPeakError < 0.1) { // sanity check: do we really have the same precursor in the original and the picked spectrum if (fabs(exp[precursor_spectrum_idx].getPrecursors()[0].getMZ() - mz) > 0.0001) { cout << "Error: index is referencing different precursors in original and picked spectrum." << endl; } // cout << mz << " -> " << nearest_peak_mz << endl; DoubleReal deltaMZ = nearest_peak_mz - mz; deltaMZs.push_back(deltaMZ); mzs.push_back(mz); rts.push_back(rt); // correct entries Precursor corrected_prec = precursors[i]; corrected_prec.setMZ(nearest_peak_mz); exp[precursor_spectrum_idx].getPrecursors()[0] = corrected_prec; } } }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // general variables and data //------------------------------------------------------------- FileHandler fh; vector<PeptideIdentification> peptide_identifications; vector<ProteinIdentification> protein_identifications; //------------------------------------------------------------- // reading input //------------------------------------------------------------- const String in = getStringOption_("in"); ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, 1, "Loading..."); if (File::isDirectory(in)) { const String in_directory = File::absolutePath(in).ensureLastChar('/'); const String mz_file = getStringOption_("mz_file"); const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide"); UInt i = 0; FileHandler fh; FileTypes::Type type; MSExperiment<Peak1D> msexperiment; // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;) However, now String::toInt() might throw. map<Int, float> num_and_rt; vector<String> NativeID; // The mz-File (if given) if (!mz_file.empty()) { type = fh.getTypeByFileName(mz_file); fh.loadExperiment(mz_file, msexperiment, type); for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it) { String(spectra_it->getNativeID()).split('=', NativeID); try { num_and_rt[NativeID[1].toInt()] = spectra_it->getRT(); // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01 } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } } } // Get list of the actual Sequest .out-Files StringList in_files; if (!File::fileList(in_directory, String("*.out"), in_files)) { writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!"); } // Now get to work ... for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it) { vector<PeptideIdentification> peptide_ids_seq; ProteinIdentification protein_id_seq; vector<double> pvalues_seq; vector<String> in_file_vec; SequestOutfile sequest_outfile; writeDebug_(String("Reading file ") + *in_files_it, 3); try { sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide); in_files_it->split('.', in_file_vec); for (Size j = 0; j < peptide_ids_seq.size(); ++j) { // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files. peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i); Int scan_number = 0; if (!mz_file.empty()) { try { scan_number = in_file_vec[2].toInt(); peptide_ids_seq[j].setRT(num_and_rt[scan_number]); } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } catch (exception& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.what()); } //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge(); peptide_ids_seq[j].setMZ(real_mz); } writeDebug_(String("scan: ") + String(scan_number) + String(" RT: ") + String(peptide_ids_seq[j].getRT()) + " MZ: " + String(peptide_ids_seq[j].getMZ()) + " Ident: " + peptide_ids_seq[j].getIdentifier(), 4); peptide_identifications.push_back(peptide_ids_seq[j]); } protein_id_seq.setIdentifier(*in_files_it + "_" + i); protein_identifications.push_back(protein_id_seq); ++i; } catch (Exception::ParseError& pe) { writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")"); throw; } catch (...) { writeLog_(String("Error reading file: ") + *in_files_it); throw; } } writeDebug_("All files processed.", 3); } // ! directory else { FileTypes::Type in_type = fh.getType(in); if (in_type == FileTypes::PEPXML) { String exp_name = getStringOption_("mz_file"); String orig_name = getStringOption_("mz_name"); bool use_precursor_data = getFlag_("use_precursor_data"); if (exp_name.empty()) { PepXMLFile().load(in, protein_identifications, peptide_identifications, orig_name); } else { MSExperiment<> exp; fh.loadExperiment(exp_name, exp); if (!orig_name.empty()) { exp_name = orig_name; } PepXMLFile().load(in, protein_identifications, peptide_identifications, exp_name, exp, use_precursor_data); } } else if (in_type == FileTypes::IDXML) { IdXMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::MZIDENTML) { LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl; MzIdentMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::PROTXML) { protein_identifications.resize(1); peptide_identifications.resize(1); ProtXMLFile().load(in, protein_identifications[0], peptide_identifications[0]); } else if (in_type == FileTypes::OMSSAXML) { protein_identifications.resize(1); OMSSAXMLFile().load(in, protein_identifications[0], peptide_identifications, true); } else if (in_type == FileTypes::MASCOTXML) { String scan_regex = getStringOption_("scan_regex"); String exp_name = getStringOption_("mz_file"); MascotXMLFile::RTMapping rt_mapping; if (!exp_name.empty()) { PeakMap exp; // load only MS2 spectra: fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); } protein_identifications.resize(1); MascotXMLFile().load(in, protein_identifications[0], peptide_identifications, rt_mapping, scan_regex); } else if (in_type == FileTypes::XML) { ProteinIdentification protein_id; XTandemXMLFile().load(in, protein_id, peptide_identifications); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_identifications.push_back(protein_id); String exp_name = getStringOption_("mz_file"); if (!exp_name.empty()) { PeakMap exp; fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl; } } } } else { writeLog_("Unknown input file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } } logger.endProgress(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- const String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } logger.startProgress(0, 1, "Storing..."); if (out_type == FileTypes::PEPXML) { bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed"); String mz_file = getStringOption_("mz_file"); String mz_name = getStringOption_("mz_name"); PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed); } else if (out_type == FileTypes::IDXML) { IdXMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::MZIDENTML) { MzIdentMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::FASTA) { Size count = 0; ofstream fasta(out.c_str(), ios::out); for (Size i = 0; i < peptide_identifications.size(); ++i) { for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l) { const PeptideHit& hit = peptide_identifications[i].getHits()[l]; fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++ << "|" << hit.getSequence().toString() << endl; String seq = hit.getSequence().toUnmodifiedString(); // FASTA files should have at most 60 characters of sequence info per line for (Size j = 0; j < seq.size(); j += 60) { Size k = min(j + 60, seq.size()); fasta << string(seq[j], seq[k]) << endl; } } } } else { writeLog_("Unsupported output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } logger.endProgress(); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in(getStringOption_("in")); String out(getStringOption_("out")); String pair_in(getStringOption_("pair_in")); String feature_out(getStringOption_("feature_out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); double RT_tolerance(getDoubleOption_("RT_tolerance")); double expansion_range(getDoubleOption_("expansion_range")); Size max_isotope(getIntOption_("max_isotope")); Int debug(getIntOption_("debug")); //------------------------------------------------------------- // reading input //------------------------------------------------------------- PeakMap exp; MzMLFile().load(in, exp); exp.sortSpectra(); exp.updateRanges(); // read pair file ifstream is(pair_in.c_str()); String line; vector<SILAC_pair> pairs; while (getline(is, line)) { line.trim(); if (line.empty() || line[0] == '#') { continue; } vector<String> split; line.split(' ', split); if (split.size() != 4) { cerr << "missformated line ('" << line << "') should be (space separated) 'm/z-light m/z-heavy charge rt'" << endl; } SILAC_pair p; p.mz_light = split[0].toDouble(); p.mz_heavy = split[1].toDouble(); p.charge = split[2].toInt(); p.rt = split[3].toDouble(); pairs.push_back(p); } is.close(); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ConsensusMap results_map; results_map.getFileDescriptions()[0].label = "light"; results_map.getFileDescriptions()[0].filename = in; results_map.getFileDescriptions()[1].label = "heavy"; results_map.getFileDescriptions()[1].filename = in; FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", 3); ff_param.setValue("intensity_threshold", -1.0); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); vector<SILACQuantitation> quantlets; FeatureMap all_features; for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() == 0 || it->getMSLevel() != 1 || !it->getInstrumentSettings().getZoomScan()) { continue; } PeakSpectrum new_spec = *it; // get spacing from data double min_spacing(numeric_limits<double>::max()); double last_mz(0); for (PeakSpectrum::ConstIterator pit = new_spec.begin(); pit != new_spec.end(); ++pit) { if (pit->getMZ() - last_mz < min_spacing) { min_spacing = pit->getMZ() - last_mz; } last_mz = pit->getMZ(); } writeDebug_("Min-spacing=" + String(min_spacing), 1); // split the spectrum into two subspectra, by using different hypothesis of // the SILAC pairs Size idx = 0; for (vector<SILAC_pair>::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit, ++idx) { // in RT window? if (fabs(it->getRT() - pit->rt) >= RT_tolerance) { continue; } // now excise the two ranges for the pair, complete isotope distributions of both, light and heavy PeakSpectrum light_spec, heavy_spec; light_spec.setRT(it->getRT()); heavy_spec.setRT(it->getRT()); for (PeakSpectrum::ConstIterator sit = it->begin(); sit != it->end(); ++sit) { double mz(sit->getMZ()); if (mz - (pit->mz_light - precursor_mass_tolerance) > 0 && (pit->mz_light + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz > 0) { light_spec.push_back(*sit); } if (mz - (pit->mz_heavy - precursor_mass_tolerance) > 0 && (pit->mz_heavy + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz > 0) { heavy_spec.push_back(*sit); } } // expand light spectrum Peak1D p; p.setIntensity(0); if (light_spec.size() > 0) { double lower_border = light_spec.begin()->getMZ() - expansion_range; for (double pos = light_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing) { p.setMZ(pos); light_spec.insert(light_spec.begin(), p); } double upper_border = light_spec.begin()->getMZ() - expansion_range; for (double pos = light_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing) { p.setMZ(pos); light_spec.push_back(p); } } if (heavy_spec.size() > 0) { // expand heavy spectrum double lower_border = heavy_spec.begin()->getMZ() - expansion_range; for (double pos = heavy_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing) { p.setMZ(pos); heavy_spec.insert(heavy_spec.begin(), p); } double upper_border = heavy_spec.begin()->getMZ() - expansion_range; for (double pos = heavy_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing) { p.setMZ(pos); heavy_spec.push_back(p); } } // create experiments for feature finding PeakMap new_exp_light, new_exp_heavy; new_exp_light.addSpectrum(light_spec); new_exp_heavy.addSpectrum(heavy_spec); if (debug > 9) { MzMLFile().store(String(it->getRT()) + "_debugging_light.mzML", new_exp_light); MzMLFile().store(String(it->getRT()) + "_debugging_heavy.mzML", new_exp_heavy); } writeDebug_("Spectrum-id: " + it->getNativeID() + " @ " + String(it->getRT()) + "s", 1); new_exp_light.updateRanges(); new_exp_heavy.updateRanges(); FeatureMap feature_map_light, feature_map_heavy, seeds; if (light_spec.size() > 0) { ff.run("isotope_wavelet", new_exp_light, feature_map_light, ff_param, seeds); } writeDebug_("#light_features=" + String(feature_map_light.size()), 1); if (heavy_spec.size() > 0) { ff.run("isotope_wavelet", new_exp_heavy, feature_map_heavy, ff_param, seeds); } writeDebug_("#heavy_features=" + String(feature_map_heavy.size()), 1); // search if feature maps to m/z value of pair vector<MatchedFeature> light, heavy; for (FeatureMap::const_iterator fit = feature_map_light.begin(); fit != feature_map_light.end(); ++fit) { all_features.push_back(*fit); light.push_back(MatchedFeature(*fit, idx)); } for (FeatureMap::const_iterator fit = feature_map_heavy.begin(); fit != feature_map_heavy.end(); ++fit) { all_features.push_back(*fit); heavy.push_back(MatchedFeature(*fit, idx)); } if (!heavy.empty() && !light.empty()) { writeDebug_("Finding best feature pair out of " + String(light.size()) + " light and " + String(heavy.size()) + " heavy matching features.", 1); // now find "good" matches, means the pair with the smallest m/z deviation Feature best_light, best_heavy; double best_deviation(numeric_limits<double>::max()); Size best_idx(pairs.size()); for (vector<MatchedFeature>::const_iterator fit1 = light.begin(); fit1 != light.end(); ++fit1) { for (vector<MatchedFeature>::const_iterator fit2 = heavy.begin(); fit2 != heavy.end(); ++fit2) { if (fit1->idx != fit2->idx || fit1->f.getCharge() != fit2->f.getCharge() || fabs(fit1->f.getMZ() - pairs[fit1->idx].mz_light) > precursor_mass_tolerance || fabs(fit2->f.getMZ() - pairs[fit2->idx].mz_heavy) > precursor_mass_tolerance) { continue; } double deviation(0); deviation = fabs((fit1->f.getMZ() - pairs[fit1->idx].mz_light) - (fit2->f.getMZ() - pairs[fit2->idx].mz_heavy)); if (deviation < best_deviation && deviation < precursor_mass_tolerance) { best_light = fit1->f; best_heavy = fit2->f; best_idx = fit1->idx; } } } if (best_idx == pairs.size()) { continue; } writeDebug_("Ratio: " + String(best_heavy.getIntensity() / best_light.getIntensity()), 1); ConsensusFeature SILAC_feature; SILAC_feature.setMZ((best_light.getMZ() + best_heavy.getMZ()) / 2.0); SILAC_feature.setRT((best_light.getRT() + best_heavy.getRT()) / 2.0); SILAC_feature.insert(0, best_light); SILAC_feature.insert(1, best_heavy); results_map.push_back(SILAC_feature); quantlets.push_back(SILACQuantitation(best_light.getIntensity(), best_heavy.getIntensity(), best_idx)); } } } // now calculate the final quantitation values from the quantlets Map<Size, vector<SILACQuantitation> > idx_to_quantlet; for (vector<SILACQuantitation>::const_iterator it = quantlets.begin(); it != quantlets.end(); ++it) { idx_to_quantlet[it->idx].push_back(*it); } for (Map<Size, vector<SILACQuantitation> >::ConstIterator it1 = idx_to_quantlet.begin(); it1 != idx_to_quantlet.end(); ++it1) { SILAC_pair silac_pair = pairs[it1->first]; // simply add up all intensities and calculate the final ratio double light_sum(0), heavy_sum(0); vector<double> light_ints, heavy_ints, ratios; for (vector<SILACQuantitation>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2) { light_sum += it2->light_intensity; light_ints.push_back(it2->light_intensity); heavy_sum += it2->heavy_intensity; heavy_ints.push_back(it2->heavy_intensity); ratios.push_back(it2->heavy_intensity / it2->light_intensity * (it2->heavy_intensity + it2->light_intensity)); } double absdev_ratios = Math::absdev(ratios.begin(), ratios.begin() + (ratios.size()) / (heavy_sum + light_sum)); cout << "Ratio: " << silac_pair.mz_light << " <-> " << silac_pair.mz_heavy << " @ " << silac_pair.rt << " s, ratio(h/l) " << heavy_sum / light_sum << " +/- " << absdev_ratios << " (#scans for quantation: " << String(it1->second.size()) << " )" << endl; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- if (feature_out != "") { FeatureXMLFile().store(feature_out, all_features); } writeDebug_("Writing output", 1); ConsensusXMLFile().store(out, results_map); return EXECUTION_OK; }
START_SECTION(virtual ~MascotGenericFile()) { delete ptr; } END_SECTION ptr = new MascotGenericFile(); START_SECTION((template < typename MapType > void load(const String &filename, MapType &exp))) { PeakMap exp; ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp); TEST_EQUAL(exp.size(), 1) TEST_EQUAL(exp.begin()->size(), 9) } END_SECTION START_SECTION((void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact = false))) { PeakMap exp; ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp); // handling of modifications: Param params = ptr->getParameters(); params.setValue("fixed_modifications", ListUtils::create<String>("Carbamidomethyl (C),Phospho (S)")); params.setValue("variable_modifications", ListUtils::create<String>("Oxidation (M),Deamidated (N),Deamidated (Q)")); ptr->setParameters(params); stringstream ss;
void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds) { // Nothing to do if there is no data if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty())) { features.clear(true); return; } // check input { // We need updated ranges => check number of peaks if (algorithm_name != "mrm" && input_map.getSize() == 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting."); } // We need MS1 data only => check levels if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1)) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting."); } //Check if the peaks are sorted according to m/z if (!input_map.isSorted(true)) { LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl; input_map.sortSpectra(true); input_map.sortChromatograms(true); } for (Size s = 0; s < input_map.size(); ++s) { if (input_map[s].empty()) continue; if (input_map[s][0].getMZ() < 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting."); } } } // initialize if (algorithm_name != "mrm" && algorithm_name != "centroided") { // Resize peak flag vector flags_.resize(input_map.size()); for (Size i = 0; i < input_map.size(); ++i) { flags_[i].assign(input_map[i].size(), UNUSED); } } // do the work if (algorithm_name != "none") { FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name); algorithm->setParameters(param); algorithm->setData(input_map, features, *this); algorithm->setSeeds(seeds); algorithm->run(); delete(algorithm); } if (algorithm_name != "mrm") // mrm works on chromatograms; the next section is only for conventional data { //report RT apex spectrum index and native ID for each feature for (Size i = 0; i < features.size(); ++i) { //index Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin(); features[i].setMetaValue("spectrum_index", spectrum_index); //native id if (spectrum_index < input_map.size()) { String native_id = input_map[spectrum_index].getNativeID(); features[i].setMetaValue("spectrum_native_id", native_id); } else { /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens) std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl; } } } }
TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5) e_ptr->filterSpectrum(spec); TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5)) END_SECTION START_SECTION((void filterPeakMap(PeakMap& exp))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); PeakMap pm; pm.addSpectrum(spec); TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), 37.5) e_ptr->filterPeakMap(pm); TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), sqrt(37.5)) END_SECTION START_SECTION((void filterPeakSpectrum(PeakSpectrum& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5) e_ptr->filterPeakSpectrum(spec); TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5)) END_SECTION
ExitCodes main_(int, const char**) { // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:') String ini_location; // path to the log file String logfile(getStringOption_("log")); String xtandem_executable(getStringOption_("xtandem_executable")); String inputfile_name; String outputfile_name; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- inputfile_name = getStringOption_("in"); writeDebug_(String("Input file: ") + inputfile_name, 1); if (inputfile_name == "") { writeLog_("No input file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } outputfile_name = getStringOption_("out"); writeDebug_(String("Output file: ") + outputfile_name, 1); if (outputfile_name == "") { writeLog_("No output file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } // write input xml file String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files { QDir d; d.mkpath(temp_directory.toQString()); } String input_filename(temp_directory + "_tandem_input_file.xml"); String tandem_input_filename(temp_directory + "_tandem_input_file.mzData"); String tandem_output_filename(temp_directory + "_tandem_output_file.xml"); String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml"); //------------------------------------------------------------- // Validate user parameters //------------------------------------------------------------- if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge")) { LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl; return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // reading input //------------------------------------------------------------- String db_name(getStringOption_("database")); if (!File::readable(db_name)) { String full_db_name; try { full_db_name = File::findDatabase(db_name); } catch (...) { printUsage_(); return ILLEGAL_PARAMETERS; } db_name = full_db_name; } PeakMap exp; MzMLFile mzml_file; mzml_file.getOptions().addMSLevel(2); // only load msLevel 2 mzml_file.setLogType(log_type_); mzml_file.load(inputfile_name, exp); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } // we need to replace the native id with a simple numbering schema, to be able to // map the IDs back to the spectra (RT, and MZ information) Size native_id(0); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { it->setNativeID(++native_id); } // We store the file in mzData file format, because MGF files somehow produce in most // of the cases IDs with charge 2+. We do not use the input file directly // because XTandem sometimes stumbles over misleading substrings in the filename, // e.g. mzXML ... MzDataFile mzdata_outfile; mzdata_outfile.store(tandem_input_filename, exp); XTandemInfile infile; infile.setInputFilename(tandem_input_filename); infile.setOutputFilename(tandem_output_filename); ofstream tax_out(tandem_taxonomy_filename.c_str()); tax_out << "<?xml version=\"1.0\"?>" << "\n"; tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n"; tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n"; tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n"; tax_out << "\t</taxon>" << "\n"; tax_out << "</bioml>" << "\n"; tax_out.close(); infile.setTaxonomyFilename(tandem_taxonomy_filename); if (getStringOption_("precursor_error_units") == "Da") { infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setPrecursorMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("fragment_error_units") == "Da") { infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setFragmentMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("default_input_file") != "") { infile.load(getStringOption_("default_input_file")); infile.setDefaultParametersFilename(getStringOption_("default_input_file")); } else { String default_file = File::find("CHEMISTRY/XTandem_default_input.xml"); infile.load(default_file); infile.setDefaultParametersFilename(default_file); } infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance")); infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance")); infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance")); infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge")); infile.setNumberOfThreads(getIntOption_("threads")); infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); infile.setTaxon("OpenMS_dummy_taxonomy"); infile.setOutputResults(getStringOption_("output_results")); infile.setMaxValidEValue(getDoubleOption_("max_valid_expect")); infile.setCleavageSite(getStringOption_("cleavage_site")); infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages")); infile.setRefine(getFlag_("refinement")); infile.setSemiCleavage(getFlag_("semi_cleavage")); bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false; infile.setAllowIsotopeError(allow_isotope_error); infile.write(input_filename); //------------------------------------------------------------- // calculations //------------------------------------------------------------- int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc... if (status != 0) { writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?"); // clean temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } return EXTERNAL_PROGRAM_ERROR; } vector<ProteinIdentification> protein_ids; ProteinIdentification protein_id; vector<PeptideIdentification> peptide_ids; // read the output of X!Tandem and write it to idXML XTandemXMLFile tandem_output; tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); // find the file, because XTandem extends the filename with a timestamp we do not know (exactly) StringList files; File::fileList(temp_directory, "_tandem_output_file*.xml", files); if (files.size() != 1) { throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename); } tandem_output.load(temp_directory + files[0], protein_id, peptide_ids); // now put the RTs into the peptide_ids from the spectrum ids for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); //it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl; } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // handle the search parameters ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("database"); search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge")); ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC; search_parameters.mass_type = mass_type; search_parameters.fixed_modifications = getStringList_("fixed_modifications"); search_parameters.variable_modifications = getStringList_("variable_modifications"); search_parameters.missed_cleavages = getIntOption_("missed_cleavages"); search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance"); search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance"); protein_id.setSearchParameters(search_parameters); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_ids.push_back(protein_id); IdXMLFile().store(outputfile_name, protein_ids, peptide_ids); /// Deletion of temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } // some stats LOG_INFO << "Statistics:\n" << " identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl; return EXECUTION_OK; }