Esempio n. 1
0
void MascotXMLFile::initializeLookup(SpectrumMetaDataLookup& lookup, const MSExperiment<>& exp, const String& scan_regex)
{
    // load spectra and extract scan numbers from the native IDs
    // (expected format: "... scan=#"):
    lookup.readSpectra(exp.getSpectra());
    if (scan_regex.empty()) // use default formats
    {
        if (!lookup.empty()) // raw data given -> spectrum look-up possible
        {
            // possible formats and resulting scan numbers:
            // - Mascot 2.3 (?):
            // <pep_scan_title>scan=818</pep_scan_title> -> 818
            // - ProteomeDiscoverer/Mascot 2.3 or 2.4:
            // <pep_scan_title>Spectrum136 scans:712,</pep_scan_title> -> 712
            // - other variants:
            // <pep_scan_title>Spectrum3411 scans: 2975,</pep_scan_title> -> 2975
            // <...>File773 Spectrum198145 scans: 6094</...> -> 6094
            // <...>6860: Scan 10668 (rt=5380.57)</...> -> 10668
            // <pep_scan_title>Scan Number: 1460</pep_scan_title> -> 1460
            lookup.addReferenceFormat("[Ss]can( [Nn]umber)?s?[=:]? *(?<SCAN>\\d+)");
            // - with .dta input to Mascot:
            // <...>/path/to/FTAC05_13.673.673.2.dta</...> -> 673
            lookup.addReferenceFormat("\\.(?<SCAN>\\d+)\\.\\d+\\.(?<CHARGE>\\d+)(\\.dta)?");
        }
        // title containing RT and MZ instead of scan number:
        // <...>575.848571777344_5018.0811_controllerType=0 controllerNumber=1 scan=11515_EcoliMS2small</...>
        lookup.addReferenceFormat("^(?<MZ>\\d+(\\.\\d+)?)_(?<RT>\\d+(\\.\\d+)?)");
    }
    else // use only user-defined format
    {
        lookup.addReferenceFormat(scan_regex);
    }
}
Esempio n. 2
0
  ExitCodes main_(int, const char**) override
  {
    String tmp_dir = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files
    {
      QDir d;
      d.mkpath(tmp_dir.toQString());
    }
    String logfile(getStringOption_("log"));
    String myrimatch_executable(getStringOption_("myrimatch_executable"));

    //-------------------------------------------------------------
    // get version of MyriMatch
    //-------------------------------------------------------------

    QProcess qp;
    String myrimatch_version;
    MyriMatchVersion myrimatch_version_i;

    // we invoke myrimatch w/o arguments. that yields a return code != 0. but
    // there is no other way for version 2.1 to get the version number
    qp.start(myrimatch_executable.toQString(), QStringList(), QIODevice::ReadOnly); // does automatic escaping etc...
    qp.waitForFinished();
    String output(QString(qp.readAllStandardOutput()));

    vector<String> lines;
    vector<String> version_split;
    output.split('\n', lines);

    // the version number is expected to be in the second line
    if (lines.size() < 2)
    {
      writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!");
      return EXTERNAL_PROGRAM_ERROR;
    }

    // the version is expected to be something like:
    // MyriMatch 2.1.111 (2011-12-27)
    lines[1].split(' ', version_split);
    if (version_split.size() == 3 && getVersion_(version_split[1], myrimatch_version_i))
    {
      myrimatch_version = version_split[1].removeWhitespaces();
      writeDebug_("Setting MyriMatch version to " + myrimatch_version, 1);
    }
    else
    {
      writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!");
      return EXTERNAL_PROGRAM_ERROR;
    }
    if (! (   (myrimatch_version_i.myrimatch_major == 2) &&  // major must be 2
              (myrimatch_version_i.myrimatch_minor == 1 || myrimatch_version_i.myrimatch_minor == 2) // minor .1 or .2
          ))
    {
      writeLog_("Warning: unsupported MyriMatch version (" + myrimatch_version + "). Tested only for MyriMatch 2.1.x and 2.2.x."
                "\nIf you encounter parameter errors, you can try the flag 'ignoreConfigErrors', but be aware that MyriMatch might be misconfigured.");
    }

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    String inputfile_name = File::absolutePath(getStringOption_("in"));
    String outputfile_name = getStringOption_("out");
    String db_name = File::absolutePath(String(getStringOption_("database")));

    // building parameter String
    StringList parameters;

    if (getFlag_("ignoreConfigErrors")) parameters << "-ignoreConfigErrors";

    // Common Identification engine options
    StringList static_mod_list;
    StringList dynamic_mod_list;
    translateModifications(static_mod_list, dynamic_mod_list);
    if (!static_mod_list.empty())
      parameters << "-StaticMods" << ListUtils::concatenate(static_mod_list, " ");
    if (!dynamic_mod_list.empty())
      parameters << "-DynamicMods" << ListUtils::concatenate(dynamic_mod_list, " ");

    parameters << "-ProteinDatabase"  << File::absolutePath(db_name);

    if (getFlag_("precursor_mass_tolerance_avg"))
    {
      parameters << "-AvgPrecursorMzTolerance";
    }
    else
    {
      parameters << "-MonoPrecursorMzTolerance";
    }
    String precursor_mass_tolerance_unit = getStringOption_("precursor_mass_tolerance_unit") == "Da" ? " m/z" : " ppm";
    parameters << String(getDoubleOption_("precursor_mass_tolerance")) + precursor_mass_tolerance_unit;

    String fragment_mass_tolerance_unit = getStringOption_("fragment_mass_tolerance_unit");
    if (fragment_mass_tolerance_unit == "Da")
    {
      fragment_mass_tolerance_unit = "m/z";
    }

    parameters << "-FragmentMzTolerance" << String(getDoubleOption_("fragment_mass_tolerance")) + " " + fragment_mass_tolerance_unit;

    StringList slf = getStringList_("SpectrumListFilters");
    if (slf.size() > 0) 
    {
      if (myrimatch_version_i.myrimatch_minor <= 1)
      { // use quotes around the slf arguments (will be added automatically by Qt during call), i.e. "-SpectrumListFilters" "peakPicking false 2-"
        parameters << "-SpectrumListFilters" << ListUtils::concatenate(slf, ";") << "";
      }
      else
      { // no quotes -- pass a single argument, i.e. "-SpectrumListFilters peakPicking false 2-"
        parameters << "-SpectrumListFilters " + ListUtils::concatenate(slf, ";") << "";
      }
      
    }
    //parameters << "-ThreadCountMultiplier" << String(getIntOption_("threads")); // MyriMatch does not recognise this, even though it's in the manual.

    // MyriMatch specific parameters
    parameters << "-NumChargeStates" << getIntOption_("NumChargeStates");
    parameters << "-TicCutoffPercentage" << String(getDoubleOption_("TicCutoffPercentage"));
    parameters << "-MaxDynamicMods" << getIntOption_("MaxDynamicMods");
    parameters << "-MaxResultRank" << getIntOption_("MaxResultRank");
    parameters << "-MinTerminiCleavages" << getIntOption_("MinTerminiCleavages");
    parameters << "-MaxMissedCleavages" << getIntOption_("MaxMissedCleavages");
    String cleavage_rule = getStringOption_("CleavageRules");
    if (cleavage_rule.empty())
    {
      cleavage_rule = "Trypsin/P";
    }
    parameters << "-CleavageRules" << cleavage_rule;

    // advanced parameters
    parameters << "-MinPeptideMass"   << getDoubleOption_("MinPeptideMass");
    parameters << "-MaxPeptideMass"   << getDoubleOption_("MaxPeptideMass");
    parameters << "-MinPeptideLength" << getIntOption_("MinPeptideLength");
    parameters << "-MaxPeptideLength" << getIntOption_("MaxPeptideLength");
    parameters << "-NumIntensityClasses" << getIntOption_("NumIntensityClasses");
    parameters << "-ClassSizeMultiplier" << getDoubleOption_("ClassSizeMultiplier");
    parameters << "-MonoisotopeAdjustmentSet" << getStringOption_("MonoisotopeAdjustmentSet");
    parameters << "-cpus" << getIntOption_("threads");


    // Constant parameters

    // DecoyPrefix worked only when set through the config file
    String cfg_file = tmp_dir + "myrimatch.cfg";
    ofstream f(cfg_file.c_str());
    f << "DecoyPrefix=\"\"\n";
    f.close();
    parameters << "-cfg" << cfg_file;

    // path to input file must be the last parameter
    parameters << inputfile_name;

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    QStringList qparam;
    writeDebug_("MyriMatch arguments:", 1);
    writeDebug_(String("\"") + ListUtils::concatenate(parameters, "\" \"") + "\"", 1);
    for (Size i = 0; i < parameters.size(); ++i)
    {
      qparam << parameters[i].toQString();
    }
    QProcess process;

    // Bad style, because it breaks relative paths?
    process.setWorkingDirectory(tmp_dir.toQString());

    process.start(myrimatch_executable.toQString(), qparam, QIODevice::ReadOnly);
    bool success = process.waitForFinished(-1);
    String myri_msg(QString(process.readAllStandardOutput()));
    String myri_err(QString(process.readAllStandardError()));
    writeDebug_(myri_msg, 1);
    writeDebug_(myri_err, 0);
    if (!success || process.exitStatus() != 0 || process.exitCode() != 0)
    {
      writeLog_("Error: MyriMatch problem! (Details can be seen in the logfile: \"" + logfile + "\")");
      writeLog_("Note: This message can also be triggered if you run out of space in your tmp directory");
      return EXTERNAL_PROGRAM_ERROR;
    }

    //-------------------------------------------------------------
    // reading MyriMatch output
    //-------------------------------------------------------------

    writeDebug_("Reading output of MyriMatch", 5);
    String exp_name = File::basename(inputfile_name);
    String pep_file = tmp_dir + File::removeExtension(exp_name) + ".pepXML";

    vector<ProteinIdentification> protein_identifications;
    vector<PeptideIdentification> peptide_identifications;

    PeakMap exp;
    if (File::exists(pep_file))
    {
      MzMLFile fh;
      fh.load(inputfile_name, exp);

      SpectrumMetaDataLookup lookup;
      lookup.readSpectra(exp.getSpectra());
      PepXMLFile().load(pep_file, protein_identifications,
                        peptide_identifications, exp_name, lookup);
    }
    else
    {
      writeLog_("Error: MyriMatch problem! No pepXML output file (expected as '" + pep_file + "') was generated by MyriMatch.");
      writeLog_("Note: This message can be triggered if no MS2 spectra were found or no identifications were made.");
      writeLog_("      Myrimatch expects MS2 spectra in mzML files to contain the MSn tag. MSSpectrum with MS level 2 is not sufficient. You can use FileConverter to create such an mzML file by converting from mzML --> mzXML --> mzML.");
      return EXTERNAL_PROGRAM_ERROR;
    }

    if (debug_level_ == 0)
    {
      QFile(pep_file.toQString()).remove();
      QFile(cfg_file.toQString()).remove();
    }
    else
    {
      writeDebug_(String("Not removing '") + pep_file + "' for debugging purposes. Please delete manually!", 1);
      writeDebug_(String("Not removing '") + cfg_file + "' for debugging purposes. Please delete manually!", 1);
    }
    //-------------------------------------------------------------
    // writing results
    //-------------------------------------------------------------
    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.db = getStringOption_("database");
    ProteinIdentification::PeakMassType mass_type = getFlag_("precursor_mass_tolerance_avg") == true ? ProteinIdentification::AVERAGE : ProteinIdentification::MONOISOTOPIC;
    search_parameters.mass_type = mass_type;
    search_parameters.fixed_modifications = getStringList_("fixed_modifications");
    search_parameters.variable_modifications = getStringList_("variable_modifications");
    search_parameters.missed_cleavages = getIntOption_("MaxMissedCleavages");
    search_parameters.fragment_mass_tolerance = getDoubleOption_("fragment_mass_tolerance");
    search_parameters.precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
    search_parameters.precursor_mass_tolerance_ppm = getStringOption_("precursor_mass_tolerance_unit") == "ppm" ? true : false;
    search_parameters.fragment_mass_tolerance_ppm = getStringOption_("fragment_mass_tolerance_unit") == "ppm" ? true : false;
    protein_identifications[0].setSearchParameters(search_parameters);
    protein_identifications[0].setSearchEngineVersion(myrimatch_version);
    protein_identifications[0].setSearchEngine("MyriMatch");

    if (!protein_identifications.empty())
    {
      StringList ms_runs;
      exp.getPrimaryMSRunPath(ms_runs);
      protein_identifications[0].setPrimaryMSRunPath(ms_runs);
    }
    IdXMLFile().store(outputfile_name, protein_identifications, peptide_identifications);
    return EXECUTION_OK;
  }
Esempio n. 3
0
void MascotXMLFile::load(const String& filename,
                         ProteinIdentification& protein_identification,
                         vector<PeptideIdentification>& id_data,
                         map<String, vector<AASequence> >& peptides,
                         const SpectrumMetaDataLookup& lookup)
{
    //clear
    protein_identification = ProteinIdentification();
    id_data.clear();

    Internal::MascotXMLHandler handler(protein_identification, id_data,
                                       filename, peptides, lookup);
    parse_(filename, &handler);

    // since the Mascot XML can contain "peptides" without sequences,
    // the identifications without any real peptide hit are removed
    vector<PeptideIdentification> filtered_hits;
    filtered_hits.reserve(id_data.size());
    Size missing_sequence = 0; // counter

    for (vector<PeptideIdentification>::iterator id_it = id_data.begin();
            id_it != id_data.end(); ++id_it)
    {
        const vector<PeptideHit>& peptide_hits = id_it->getHits();
        if (!peptide_hits.empty() &&
                (peptide_hits.size() > 1 || !peptide_hits[0].getSequence().empty()))
        {
            filtered_hits.push_back(*id_it);
        }
        else if (!id_it->empty()) ++missing_sequence;
    }
    if (missing_sequence)
    {
        LOG_WARN << "Warning: Removed " << missing_sequence
                 << " peptide identifications without sequence." << endl;
    }
    id_data.swap(filtered_hits);

    // check if we have (some) RT information:
    Size no_rt_count = 0;
    for (vector<PeptideIdentification>::iterator id_it = id_data.begin();
            id_it != id_data.end(); ++id_it)
    {
        if (!id_it->hasRT()) ++no_rt_count;
    }
    if (no_rt_count)
    {
        LOG_WARN << "Warning: " << no_rt_count << " (of " << id_data.size()
                 << ") peptide identifications have no retention time value."
                 << endl;
    }
    // if we have a mapping, but couldn't find any RT values, that's an error:
    if (!lookup.empty() && (no_rt_count == id_data.size()))
    {
        throw Exception::MissingInformation(
            __FILE__, __LINE__, __PRETTY_FUNCTION__,
            "No retention time information for peptide identifications found");
    }

    // argh! Mascot 2.2 tends to repeat the first hit (yes it appears twice),
    // so we delete one of them
    for (vector<PeptideIdentification>::iterator it = id_data.begin();
            it != id_data.end(); ++it)
    {
        vector<PeptideHit> peptide_hits = it->getHits();
        // check if equal, except for rank
        if (peptide_hits.size() > 1 &&
                peptide_hits[0].getScore() == peptide_hits[1].getScore() &&
                peptide_hits[0].getSequence() == peptide_hits[1].getSequence() &&
                peptide_hits[0].getCharge() == peptide_hits[1].getCharge())
        {
            // erase first hit
            peptide_hits.erase(peptide_hits.begin() + 1);
            it->setHits(peptide_hits);
        }
    }
}
spectrum.setRT(2.0);
spectrum.setMSLevel(2);
Precursor prec;
prec.setMZ(1000.0);
prec.setCharge(2);
spectrum.getPrecursors().push_back(prec);
spectra.push_back(spectrum);
spectrum.setNativeID("spectrum=2");
spectrum.setRT(3.0);
spectrum.setMSLevel(2);
prec.setMZ(500.0);
prec.setCharge(3);
spectrum.getPrecursors()[0] = prec;
spectra.push_back(spectrum);

SpectrumMetaDataLookup lookup;

START_SECTION((template <typename SpectrumContainer> void readSpectra(const SpectrumContainer&, const String&, bool)))
{
  lookup.readSpectra(spectra, SpectrumLookup::default_scan_regexp, true);
  TEST_EQUAL(lookup.empty(), false);
}
END_SECTION

START_SECTION((void getSpectrumMetaData(Size, SpectrumMetaData&) const))
{
  SpectrumMetaDataLookup::SpectrumMetaData meta;
  lookup.getSpectrumMetaData(0, meta);
  TEST_EQUAL(meta.rt, 1.0);
  TEST_EQUAL(meta.ms_level, 1);
  TEST_EQUAL(meta.native_id, "spectrum=0");