void MascotXMLFile::initializeLookup(SpectrumMetaDataLookup& lookup, const MSExperiment<>& exp, const String& scan_regex) { // load spectra and extract scan numbers from the native IDs // (expected format: "... scan=#"): lookup.readSpectra(exp.getSpectra()); if (scan_regex.empty()) // use default formats { if (!lookup.empty()) // raw data given -> spectrum look-up possible { // possible formats and resulting scan numbers: // - Mascot 2.3 (?): // <pep_scan_title>scan=818</pep_scan_title> -> 818 // - ProteomeDiscoverer/Mascot 2.3 or 2.4: // <pep_scan_title>Spectrum136 scans:712,</pep_scan_title> -> 712 // - other variants: // <pep_scan_title>Spectrum3411 scans: 2975,</pep_scan_title> -> 2975 // <...>File773 Spectrum198145 scans: 6094</...> -> 6094 // <...>6860: Scan 10668 (rt=5380.57)</...> -> 10668 // <pep_scan_title>Scan Number: 1460</pep_scan_title> -> 1460 lookup.addReferenceFormat("[Ss]can( [Nn]umber)?s?[=:]? *(?<SCAN>\\d+)"); // - with .dta input to Mascot: // <...>/path/to/FTAC05_13.673.673.2.dta</...> -> 673 lookup.addReferenceFormat("\\.(?<SCAN>\\d+)\\.\\d+\\.(?<CHARGE>\\d+)(\\.dta)?"); } // title containing RT and MZ instead of scan number: // <...>575.848571777344_5018.0811_controllerType=0 controllerNumber=1 scan=11515_EcoliMS2small</...> lookup.addReferenceFormat("^(?<MZ>\\d+(\\.\\d+)?)_(?<RT>\\d+(\\.\\d+)?)"); } else // use only user-defined format { lookup.addReferenceFormat(scan_regex); } }
ExitCodes main_(int, const char**) override { String tmp_dir = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files { QDir d; d.mkpath(tmp_dir.toQString()); } String logfile(getStringOption_("log")); String myrimatch_executable(getStringOption_("myrimatch_executable")); //------------------------------------------------------------- // get version of MyriMatch //------------------------------------------------------------- QProcess qp; String myrimatch_version; MyriMatchVersion myrimatch_version_i; // we invoke myrimatch w/o arguments. that yields a return code != 0. but // there is no other way for version 2.1 to get the version number qp.start(myrimatch_executable.toQString(), QStringList(), QIODevice::ReadOnly); // does automatic escaping etc... qp.waitForFinished(); String output(QString(qp.readAllStandardOutput())); vector<String> lines; vector<String> version_split; output.split('\n', lines); // the version number is expected to be in the second line if (lines.size() < 2) { writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!"); return EXTERNAL_PROGRAM_ERROR; } // the version is expected to be something like: // MyriMatch 2.1.111 (2011-12-27) lines[1].split(' ', version_split); if (version_split.size() == 3 && getVersion_(version_split[1], myrimatch_version_i)) { myrimatch_version = version_split[1].removeWhitespaces(); writeDebug_("Setting MyriMatch version to " + myrimatch_version, 1); } else { writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!"); return EXTERNAL_PROGRAM_ERROR; } if (! ( (myrimatch_version_i.myrimatch_major == 2) && // major must be 2 (myrimatch_version_i.myrimatch_minor == 1 || myrimatch_version_i.myrimatch_minor == 2) // minor .1 or .2 )) { writeLog_("Warning: unsupported MyriMatch version (" + myrimatch_version + "). Tested only for MyriMatch 2.1.x and 2.2.x." "\nIf you encounter parameter errors, you can try the flag 'ignoreConfigErrors', but be aware that MyriMatch might be misconfigured."); } //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_name = File::absolutePath(getStringOption_("in")); String outputfile_name = getStringOption_("out"); String db_name = File::absolutePath(String(getStringOption_("database"))); // building parameter String StringList parameters; if (getFlag_("ignoreConfigErrors")) parameters << "-ignoreConfigErrors"; // Common Identification engine options StringList static_mod_list; StringList dynamic_mod_list; translateModifications(static_mod_list, dynamic_mod_list); if (!static_mod_list.empty()) parameters << "-StaticMods" << ListUtils::concatenate(static_mod_list, " "); if (!dynamic_mod_list.empty()) parameters << "-DynamicMods" << ListUtils::concatenate(dynamic_mod_list, " "); parameters << "-ProteinDatabase" << File::absolutePath(db_name); if (getFlag_("precursor_mass_tolerance_avg")) { parameters << "-AvgPrecursorMzTolerance"; } else { parameters << "-MonoPrecursorMzTolerance"; } String precursor_mass_tolerance_unit = getStringOption_("precursor_mass_tolerance_unit") == "Da" ? " m/z" : " ppm"; parameters << String(getDoubleOption_("precursor_mass_tolerance")) + precursor_mass_tolerance_unit; String fragment_mass_tolerance_unit = getStringOption_("fragment_mass_tolerance_unit"); if (fragment_mass_tolerance_unit == "Da") { fragment_mass_tolerance_unit = "m/z"; } parameters << "-FragmentMzTolerance" << String(getDoubleOption_("fragment_mass_tolerance")) + " " + fragment_mass_tolerance_unit; StringList slf = getStringList_("SpectrumListFilters"); if (slf.size() > 0) { if (myrimatch_version_i.myrimatch_minor <= 1) { // use quotes around the slf arguments (will be added automatically by Qt during call), i.e. "-SpectrumListFilters" "peakPicking false 2-" parameters << "-SpectrumListFilters" << ListUtils::concatenate(slf, ";") << ""; } else { // no quotes -- pass a single argument, i.e. "-SpectrumListFilters peakPicking false 2-" parameters << "-SpectrumListFilters " + ListUtils::concatenate(slf, ";") << ""; } } //parameters << "-ThreadCountMultiplier" << String(getIntOption_("threads")); // MyriMatch does not recognise this, even though it's in the manual. // MyriMatch specific parameters parameters << "-NumChargeStates" << getIntOption_("NumChargeStates"); parameters << "-TicCutoffPercentage" << String(getDoubleOption_("TicCutoffPercentage")); parameters << "-MaxDynamicMods" << getIntOption_("MaxDynamicMods"); parameters << "-MaxResultRank" << getIntOption_("MaxResultRank"); parameters << "-MinTerminiCleavages" << getIntOption_("MinTerminiCleavages"); parameters << "-MaxMissedCleavages" << getIntOption_("MaxMissedCleavages"); String cleavage_rule = getStringOption_("CleavageRules"); if (cleavage_rule.empty()) { cleavage_rule = "Trypsin/P"; } parameters << "-CleavageRules" << cleavage_rule; // advanced parameters parameters << "-MinPeptideMass" << getDoubleOption_("MinPeptideMass"); parameters << "-MaxPeptideMass" << getDoubleOption_("MaxPeptideMass"); parameters << "-MinPeptideLength" << getIntOption_("MinPeptideLength"); parameters << "-MaxPeptideLength" << getIntOption_("MaxPeptideLength"); parameters << "-NumIntensityClasses" << getIntOption_("NumIntensityClasses"); parameters << "-ClassSizeMultiplier" << getDoubleOption_("ClassSizeMultiplier"); parameters << "-MonoisotopeAdjustmentSet" << getStringOption_("MonoisotopeAdjustmentSet"); parameters << "-cpus" << getIntOption_("threads"); // Constant parameters // DecoyPrefix worked only when set through the config file String cfg_file = tmp_dir + "myrimatch.cfg"; ofstream f(cfg_file.c_str()); f << "DecoyPrefix=\"\"\n"; f.close(); parameters << "-cfg" << cfg_file; // path to input file must be the last parameter parameters << inputfile_name; //------------------------------------------------------------- // calculations //------------------------------------------------------------- QStringList qparam; writeDebug_("MyriMatch arguments:", 1); writeDebug_(String("\"") + ListUtils::concatenate(parameters, "\" \"") + "\"", 1); for (Size i = 0; i < parameters.size(); ++i) { qparam << parameters[i].toQString(); } QProcess process; // Bad style, because it breaks relative paths? process.setWorkingDirectory(tmp_dir.toQString()); process.start(myrimatch_executable.toQString(), qparam, QIODevice::ReadOnly); bool success = process.waitForFinished(-1); String myri_msg(QString(process.readAllStandardOutput())); String myri_err(QString(process.readAllStandardError())); writeDebug_(myri_msg, 1); writeDebug_(myri_err, 0); if (!success || process.exitStatus() != 0 || process.exitCode() != 0) { writeLog_("Error: MyriMatch problem! (Details can be seen in the logfile: \"" + logfile + "\")"); writeLog_("Note: This message can also be triggered if you run out of space in your tmp directory"); return EXTERNAL_PROGRAM_ERROR; } //------------------------------------------------------------- // reading MyriMatch output //------------------------------------------------------------- writeDebug_("Reading output of MyriMatch", 5); String exp_name = File::basename(inputfile_name); String pep_file = tmp_dir + File::removeExtension(exp_name) + ".pepXML"; vector<ProteinIdentification> protein_identifications; vector<PeptideIdentification> peptide_identifications; PeakMap exp; if (File::exists(pep_file)) { MzMLFile fh; fh.load(inputfile_name, exp); SpectrumMetaDataLookup lookup; lookup.readSpectra(exp.getSpectra()); PepXMLFile().load(pep_file, protein_identifications, peptide_identifications, exp_name, lookup); } else { writeLog_("Error: MyriMatch problem! No pepXML output file (expected as '" + pep_file + "') was generated by MyriMatch."); writeLog_("Note: This message can be triggered if no MS2 spectra were found or no identifications were made."); writeLog_(" Myrimatch expects MS2 spectra in mzML files to contain the MSn tag. MSSpectrum with MS level 2 is not sufficient. You can use FileConverter to create such an mzML file by converting from mzML --> mzXML --> mzML."); return EXTERNAL_PROGRAM_ERROR; } if (debug_level_ == 0) { QFile(pep_file.toQString()).remove(); QFile(cfg_file.toQString()).remove(); } else { writeDebug_(String("Not removing '") + pep_file + "' for debugging purposes. Please delete manually!", 1); writeDebug_(String("Not removing '") + cfg_file + "' for debugging purposes. Please delete manually!", 1); } //------------------------------------------------------------- // writing results //------------------------------------------------------------- ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("database"); ProteinIdentification::PeakMassType mass_type = getFlag_("precursor_mass_tolerance_avg") == true ? ProteinIdentification::AVERAGE : ProteinIdentification::MONOISOTOPIC; search_parameters.mass_type = mass_type; search_parameters.fixed_modifications = getStringList_("fixed_modifications"); search_parameters.variable_modifications = getStringList_("variable_modifications"); search_parameters.missed_cleavages = getIntOption_("MaxMissedCleavages"); search_parameters.fragment_mass_tolerance = getDoubleOption_("fragment_mass_tolerance"); search_parameters.precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); search_parameters.precursor_mass_tolerance_ppm = getStringOption_("precursor_mass_tolerance_unit") == "ppm" ? true : false; search_parameters.fragment_mass_tolerance_ppm = getStringOption_("fragment_mass_tolerance_unit") == "ppm" ? true : false; protein_identifications[0].setSearchParameters(search_parameters); protein_identifications[0].setSearchEngineVersion(myrimatch_version); protein_identifications[0].setSearchEngine("MyriMatch"); if (!protein_identifications.empty()) { StringList ms_runs; exp.getPrimaryMSRunPath(ms_runs); protein_identifications[0].setPrimaryMSRunPath(ms_runs); } IdXMLFile().store(outputfile_name, protein_identifications, peptide_identifications); return EXECUTION_OK; }
void MascotXMLFile::load(const String& filename, ProteinIdentification& protein_identification, vector<PeptideIdentification>& id_data, map<String, vector<AASequence> >& peptides, const SpectrumMetaDataLookup& lookup) { //clear protein_identification = ProteinIdentification(); id_data.clear(); Internal::MascotXMLHandler handler(protein_identification, id_data, filename, peptides, lookup); parse_(filename, &handler); // since the Mascot XML can contain "peptides" without sequences, // the identifications without any real peptide hit are removed vector<PeptideIdentification> filtered_hits; filtered_hits.reserve(id_data.size()); Size missing_sequence = 0; // counter for (vector<PeptideIdentification>::iterator id_it = id_data.begin(); id_it != id_data.end(); ++id_it) { const vector<PeptideHit>& peptide_hits = id_it->getHits(); if (!peptide_hits.empty() && (peptide_hits.size() > 1 || !peptide_hits[0].getSequence().empty())) { filtered_hits.push_back(*id_it); } else if (!id_it->empty()) ++missing_sequence; } if (missing_sequence) { LOG_WARN << "Warning: Removed " << missing_sequence << " peptide identifications without sequence." << endl; } id_data.swap(filtered_hits); // check if we have (some) RT information: Size no_rt_count = 0; for (vector<PeptideIdentification>::iterator id_it = id_data.begin(); id_it != id_data.end(); ++id_it) { if (!id_it->hasRT()) ++no_rt_count; } if (no_rt_count) { LOG_WARN << "Warning: " << no_rt_count << " (of " << id_data.size() << ") peptide identifications have no retention time value." << endl; } // if we have a mapping, but couldn't find any RT values, that's an error: if (!lookup.empty() && (no_rt_count == id_data.size())) { throw Exception::MissingInformation( __FILE__, __LINE__, __PRETTY_FUNCTION__, "No retention time information for peptide identifications found"); } // argh! Mascot 2.2 tends to repeat the first hit (yes it appears twice), // so we delete one of them for (vector<PeptideIdentification>::iterator it = id_data.begin(); it != id_data.end(); ++it) { vector<PeptideHit> peptide_hits = it->getHits(); // check if equal, except for rank if (peptide_hits.size() > 1 && peptide_hits[0].getScore() == peptide_hits[1].getScore() && peptide_hits[0].getSequence() == peptide_hits[1].getSequence() && peptide_hits[0].getCharge() == peptide_hits[1].getCharge()) { // erase first hit peptide_hits.erase(peptide_hits.begin() + 1); it->setHits(peptide_hits); } } }
spectrum.setRT(2.0); spectrum.setMSLevel(2); Precursor prec; prec.setMZ(1000.0); prec.setCharge(2); spectrum.getPrecursors().push_back(prec); spectra.push_back(spectrum); spectrum.setNativeID("spectrum=2"); spectrum.setRT(3.0); spectrum.setMSLevel(2); prec.setMZ(500.0); prec.setCharge(3); spectrum.getPrecursors()[0] = prec; spectra.push_back(spectrum); SpectrumMetaDataLookup lookup; START_SECTION((template <typename SpectrumContainer> void readSpectra(const SpectrumContainer&, const String&, bool))) { lookup.readSpectra(spectra, SpectrumLookup::default_scan_regexp, true); TEST_EQUAL(lookup.empty(), false); } END_SECTION START_SECTION((void getSpectrumMetaData(Size, SpectrumMetaData&) const)) { SpectrumMetaDataLookup::SpectrumMetaData meta; lookup.getSpectrumMetaData(0, meta); TEST_EQUAL(meta.rt, 1.0); TEST_EQUAL(meta.ms_level, 1); TEST_EQUAL(meta.native_id, "spectrum=0");