ExitCodes main_(int, const char**) { // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:') String ini_location; // path to the log file String logfile(getStringOption_("log")); String xtandem_executable(getStringOption_("xtandem_executable")); String inputfile_name; String outputfile_name; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- inputfile_name = getStringOption_("in"); writeDebug_(String("Input file: ") + inputfile_name, 1); if (inputfile_name == "") { writeLog_("No input file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } outputfile_name = getStringOption_("out"); writeDebug_(String("Output file: ") + outputfile_name, 1); if (outputfile_name == "") { writeLog_("No output file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } // write input xml file String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files { QDir d; d.mkpath(temp_directory.toQString()); } String input_filename(temp_directory + "_tandem_input_file.xml"); String tandem_input_filename(temp_directory + "_tandem_input_file.mzData"); String tandem_output_filename(temp_directory + "_tandem_output_file.xml"); String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml"); //------------------------------------------------------------- // Validate user parameters //------------------------------------------------------------- if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge")) { LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl; return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // reading input //------------------------------------------------------------- String db_name(getStringOption_("database")); if (!File::readable(db_name)) { String full_db_name; try { full_db_name = File::findDatabase(db_name); } catch (...) { printUsage_(); return ILLEGAL_PARAMETERS; } db_name = full_db_name; } PeakMap exp; MzMLFile mzml_file; mzml_file.getOptions().addMSLevel(2); // only load msLevel 2 mzml_file.setLogType(log_type_); mzml_file.load(inputfile_name, exp); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } // we need to replace the native id with a simple numbering schema, to be able to // map the IDs back to the spectra (RT, and MZ information) Size native_id(0); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { it->setNativeID(++native_id); } // We store the file in mzData file format, because MGF files somehow produce in most // of the cases IDs with charge 2+. We do not use the input file directly // because XTandem sometimes stumbles over misleading substrings in the filename, // e.g. mzXML ... MzDataFile mzdata_outfile; mzdata_outfile.store(tandem_input_filename, exp); XTandemInfile infile; infile.setInputFilename(tandem_input_filename); infile.setOutputFilename(tandem_output_filename); ofstream tax_out(tandem_taxonomy_filename.c_str()); tax_out << "<?xml version=\"1.0\"?>" << "\n"; tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n"; tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n"; tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n"; tax_out << "\t</taxon>" << "\n"; tax_out << "</bioml>" << "\n"; tax_out.close(); infile.setTaxonomyFilename(tandem_taxonomy_filename); if (getStringOption_("precursor_error_units") == "Da") { infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setPrecursorMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("fragment_error_units") == "Da") { infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setFragmentMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("default_input_file") != "") { infile.load(getStringOption_("default_input_file")); infile.setDefaultParametersFilename(getStringOption_("default_input_file")); } else { String default_file = File::find("CHEMISTRY/XTandem_default_input.xml"); infile.load(default_file); infile.setDefaultParametersFilename(default_file); } infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance")); infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance")); infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance")); infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge")); infile.setNumberOfThreads(getIntOption_("threads")); infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); infile.setTaxon("OpenMS_dummy_taxonomy"); infile.setOutputResults(getStringOption_("output_results")); infile.setMaxValidEValue(getDoubleOption_("max_valid_expect")); infile.setCleavageSite(getStringOption_("cleavage_site")); infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages")); infile.setRefine(getFlag_("refinement")); infile.setSemiCleavage(getFlag_("semi_cleavage")); bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false; infile.setAllowIsotopeError(allow_isotope_error); infile.write(input_filename); //------------------------------------------------------------- // calculations //------------------------------------------------------------- int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc... if (status != 0) { writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?"); // clean temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } return EXTERNAL_PROGRAM_ERROR; } vector<ProteinIdentification> protein_ids; ProteinIdentification protein_id; vector<PeptideIdentification> peptide_ids; // read the output of X!Tandem and write it to idXML XTandemXMLFile tandem_output; tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); // find the file, because XTandem extends the filename with a timestamp we do not know (exactly) StringList files; File::fileList(temp_directory, "_tandem_output_file*.xml", files); if (files.size() != 1) { throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename); } tandem_output.load(temp_directory + files[0], protein_id, peptide_ids); // now put the RTs into the peptide_ids from the spectrum ids for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); //it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl; } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // handle the search parameters ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("database"); search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge")); ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC; search_parameters.mass_type = mass_type; search_parameters.fixed_modifications = getStringList_("fixed_modifications"); search_parameters.variable_modifications = getStringList_("variable_modifications"); search_parameters.missed_cleavages = getIntOption_("missed_cleavages"); search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance"); search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance"); protein_id.setSearchParameters(search_parameters); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_ids.push_back(protein_id); IdXMLFile().store(outputfile_name, protein_ids, peptide_ids); /// Deletion of temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } // some stats LOG_INFO << "Statistics:\n" << " identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl; return EXECUTION_OK; }
ExitCodes main_(int, const char**) { // parsing parameters String in(getStringOption_("in")); String feature_in(getStringOption_("feature_in")); String out(getStringOption_("out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); // reading input FileHandler fh; FileTypes::Type in_type = fh.getType(in); PeakMap exp; fh.loadExperiment(in, exp, in_type, log_type_, false, false); exp.sortSpectra(); FeatureMap feature_map; if (feature_in != "") { FeatureXMLFile().load(feature_in, feature_map); } // calculations FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", getIntOption_("max_charge")); ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold")); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); PeakMap exp2 = exp; exp2.clear(false); for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() != 0) { exp2.addSpectrum(*it); } } exp = exp2; exp.updateRanges(); // TODO check MS2 and MS1 counts ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, exp.size(), "Correcting precursor masses"); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { progresslogger.setProgress(exp.end() - it); if (it->getMSLevel() != 2) { continue; } // find first MS1 scan of the MS/MS scan PeakMap::Iterator ms1_it = it; while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1) { --ms1_it; } if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1) { writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT())); continue; } if (ms1_it->size() == 0) { writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1); continue; } PeakMap::Iterator ms2_it = ms1_it; ++ms2_it; while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2) { // first: error checks if (ms2_it->getPrecursors().empty()) { writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1); ++ms2_it; continue; } else if (ms2_it->getPrecursors().size() > 1) { writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one."); } Precursor prec = *ms2_it->getPrecursors().begin(); double prec_pos = prec.getMZ(); PeakMap new_exp; // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...) PeakSpectrum zoom_spec; for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit) { if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3) { zoom_spec.push_back(*pit); } } new_exp.addSpectrum(zoom_spec); new_exp.updateRanges(); FeatureMap features, seeds; ff.run("isotope_wavelet", new_exp, features, ff_param, seeds); if (features.empty()) { writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1); ++ms2_it; continue; } double max_int(numeric_limits<double>::min()); double min_dist(numeric_limits<double>::max()); Size max_int_feat_idx(0); for (Size i = 0; i != features.size(); ++i) { if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance && features[i].getIntensity() > max_int) { max_int_feat_idx = i; max_int = features[i].getIntensity(); min_dist = fabs(features[i].getMZ() - prec_pos); } } writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5); if (min_dist < precursor_mass_tolerance) { prec.setMZ(features[max_int_feat_idx].getMZ()); prec.setCharge(features[max_int_feat_idx].getCharge()); vector<Precursor> precs; precs.push_back(prec); ms2_it->setPrecursors(precs); writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1); } ++ms2_it; } it = --ms2_it; } progresslogger.endProgress(); // writing output fh.storeExperiment(out, exp, log_type_); return EXECUTION_OK; }
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- //determine maximum peak exp.updateRanges(); double max = exp.getMaxInt() / 100.0; for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { if (it->getMSLevel() < 2) { for (PeakMap::SpectrumType::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / max); } } } /// @todo add chromatogram support for normalization, e.g. for MRM stuff (Andreas) /* vector<MSChromatogram > chroms = exp.getChromatograms(); double sum(0); for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { sum += it2->getIntensity(); } } for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / sum * 1000000.0); } } exp.setChromatograms(chroms); */ //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::NORMALIZATION)); f.store(out, exp); return EXECUTION_OK; }