/// returns false on failure void translateModifications(StringList& static_mod_list, StringList& variable_mod_list) { // translating UNIMOD notation to MyriMatch notation of PTMs. ModificationDefinitionsSet mod_set(getStringList_("fixed_modifications"), getStringList_("variable_modifications")); if (!getStringList_("fixed_modifications").empty()) { set<String> mod_names = mod_set.getFixedModificationNames(); for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(*it); String origin = String(mod.getOrigin()); String mass_diff = String(mod.getDiffMonoMass()); if (origin == "N-term") { origin = "("; } else if (origin == "C-term") { origin = ")"; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term") { origin = "(" + origin; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term") { origin = ")" + origin; } static_mod_list.push_back(origin + " " + mod.getDiffMonoMass()); } } if (!getStringList_("variable_modifications").empty()) { set<String> mod_names = mod_set.getVariableModificationNames(); for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(*it); String origin = String(mod.getOrigin()); String mass_diff = String(mod.getDiffMonoMass()); if (origin == "N-term") { origin = "("; } else if (origin == "C-term") { origin = ")"; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term") { origin = "(" + origin; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term") { origin = ")" + origin; } variable_mod_list.push_back(origin + " * " + mass_diff); // use * for all mods (no unique-per-mod symbol should be required) } } }
void createParamFile_(ostream& os) { os << "# comet_version " << getStringOption_("comet_version") << "\n"; //required as first line in the param file os << "# Comet MS/MS search engine parameters file.\n"; os << "# Everything following the '#' symbol is treated as a comment.\n"; os << "database_name = " << getStringOption_("database") << "\n"; os << "decoy_search = " << 0 << "\n"; // 0=no (default), 1=concatenated search, 2=separate search os << "num_threads = " << getIntOption_("threads") << "\n"; // 0=poll CPU to set num threads; else specify num threads directly (max 64) // masses map<String,int> precursor_error_units; precursor_error_units["amu"] = 0; precursor_error_units["mmu"] = 1; precursor_error_units["ppm"] = 2; map<string,int> isotope_error; isotope_error["off"] = 0; isotope_error["-1/0/1/2/3"] = 1; isotope_error["-8/-4/0/4/8"] = 2; os << "peptide_mass_tolerance = " << getDoubleOption_("precursor_mass_tolerance") << "\n"; os << "peptide_mass_units = " << precursor_error_units[getStringOption_("precursor_error_units")] << "\n"; // 0=amu, 1=mmu, 2=ppm os << "mass_type_parent = " << 1 << "\n"; // 0=average masses, 1=monoisotopic masses os << "mass_type_fragment = " << 1 << "\n"; // 0=average masses, 1=monoisotopic masses os << "precursor_tolerance_type = " << 0 << "\n"; // 0=MH+ (default), 1=precursor m/z; only valid for amu/mmu tolerances os << "isotope_error = " << isotope_error[getStringOption_("isotope_error")] << "\n"; // 0=off, 1=on -1/0/1/2/3 (standard C13 error), 2= -8/-4/0/4/8 (for +4/+8 labeling) // search enzyme String enzyme_name = getStringOption_("enzyme"); String enzyme_number = String(ProteaseDB::getInstance()->getEnzyme(enzyme_name)->getCometID()); map<string,int> num_enzyme_termini; num_enzyme_termini["semi"] = 1; num_enzyme_termini["fully"] = 2; num_enzyme_termini["C-term unspecific"] = 8; num_enzyme_termini["N-term unspecific"] = 9; os << "search_enzyme_number = " << enzyme_number << "\n"; // choose from list at end of this params file os << "num_enzyme_termini = " << num_enzyme_termini[getStringOption_("num_enzyme_termini")] << "\n"; // 1 (semi-digested), 2 (fully digested, default), 8 C-term unspecific , 9 N-term unspecific os << "allowed_missed_cleavage = " << getIntOption_("allowed_missed_cleavages") << "\n"; // maximum value is 5; for enzyme search // Up to 9 variable modifications are supported // format: <mass> <residues> <0=variable/else binary> <max_mods_per_peptide> <term_distance> <n/c-term> <required> // e.g. 79.966331 STY 0 3 -1 0 0 vector<String> variable_modifications_names = getStringList_("variable_modifications"); vector<ResidueModification> variable_modifications = getModifications_(variable_modifications_names); if (variable_modifications.size() > 9) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Comet only supports 9 variable modifications. " + String(variable_modifications.size()) + " provided."); } int max_variable_mods_in_peptide = getIntOption_("max_variable_mods_in_peptide"); Size var_mod_index = 0; // write out user specified modifications for (; var_mod_index < variable_modifications.size(); ++var_mod_index) { const ResidueModification mod = variable_modifications[var_mod_index]; double mass = mod.getDiffMonoMass(); String residues = mod.getOrigin(); //TODO support binary groups, e.g. for SILAC int binary_group = 0; //TODO support mod-specific limit (default for now is the overall max per peptide) int max_current_mod_per_peptide = max_variable_mods_in_peptide; //TODO support term-distances? int term_distance = -1; int nc_term = 0; //TODO support agglomeration of Modifications to same AA. Watch out for nc_term value then. if (mod.getTermSpecificity() == ResidueModification::C_TERM) { residues = "c"; term_distance = 0; // Since users need to specify mods that apply to multiple residues/terms separately // 3 and -1 should be equal for now. nc_term = 3; } else if (mod.getTermSpecificity() == ResidueModification::N_TERM) { residues = "n"; term_distance = 0; // Since users need to specify mods that apply to multiple residues/terms separately // 2 and -1 should be equal for now. nc_term = 2; } else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_N_TERM) // not yet available { term_distance = 0; nc_term = 0; } else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_C_TERM) // not yet available { term_distance = 0; nc_term = 1; } //TODO support required variable mods bool required = false; os << "variable_mod0" << var_mod_index+1 << " = " << mass << " " << residues << " " << binary_group << " " << max_current_mod_per_peptide << " " << term_distance << " " << nc_term << " " << required << "\n"; } // fill remaining modification slots (if any) in Comet with "no modification" for (; var_mod_index < 9; ++var_mod_index) { os << "variable_mod0" << var_mod_index+1 << " = " << "0.0 X 0 3 -1 0 0" << "\n"; } os << "max_variable_mods_in_peptide = " << getIntOption_("max_variable_mods_in_peptide") << "\n"; os << "require_variable_mod = " << (int) (getStringOption_("require_variable_mod") == "true") << "\n"; // fragment ion defaults // ion trap ms/ms: 1.0005 tolerance, 0.4 offset (mono masses), theoretical_fragment_ions = 1 // high res ms/ms: 0.02 tolerance, 0.0 offset (mono masses), theoretical_fragment_ions = 0 String instrument = getStringOption_("instrument"); double bin_tol = getDoubleOption_("fragment_bin_tolerance"); double bin_offset = getDoubleOption_("fragment_bin_offset"); if (instrument == "low_res" && (bin_tol < 0.9 || bin_offset <= 0.2)) { LOG_WARN << "Fragment bin size or tolerance is quite low for low res instruments." << "\n"; } else if (instrument == "high_res" && (bin_tol > 0.2 || bin_offset > 0.1)) { LOG_WARN << "Fragment bin size or tolerance is quite high for high res instruments." << "\n"; }; os << "fragment_bin_tol = " << bin_tol << "\n"; // binning to use on fragment ions os << "fragment_bin_offset = " << bin_offset << "\n"; // offset position to start the binning (0.0 to 1.0) os << "theoretical_fragment_ions = " << (int)(instrument == "low_res") << "\n"; // 0=use flanking bin, 1=use M bin only os << "use_A_ions = " << (int)(getStringOption_("use_A_ions")=="true") << "\n"; os << "use_B_ions = " << (int)(getStringOption_("use_B_ions")=="true") << "\n"; os << "use_C_ions = " << (int)(getStringOption_("use_C_ions")=="true") << "\n"; os << "use_X_ions = " << (int)(getStringOption_("use_X_ions")=="true") << "\n"; os << "use_Y_ions = " << (int)(getStringOption_("use_Y_ions")=="true") << "\n"; os << "use_Z_ions = " << (int)(getStringOption_("use_Z_ions")=="true") << "\n"; os << "use_NL_ions = " << (int)(getStringOption_("use_NL_ions")=="true") << "\n"; // 0=no, 1=yes to consider NH3/H2O neutral loss peaks // output os << "output_sqtstream = " << 0 << "\n"; // 0=no, 1=yes write sqt to standard output os << "output_sqtfile = " << 0 << "\n"; // 0=no, 1=yes write sqt file os << "output_txtfile = " << 0 << "\n"; // 0=no, 1=yes write tab-delimited txt file os << "output_pepxmlfile = " << 1 << "\n"; // 0=no, 1=yes write pep.xml file os << "output_percolatorfile = " << !getStringOption_("pin_out").empty() << "\n"; // 0=no, 1=yes write Percolator tab-delimited input file os << "output_outfiles = " << 0 << "\n"; // 0=no, 1=yes write .out files os << "print_expect_score = " << 1 << "\n"; // 0=no, 1=yes to replace Sp with expect in out & sqt os << "num_output_lines = " << getIntOption_("num_hits") << "\n"; // num peptide results to show os << "show_fragment_ions = " << 0 << "\n"; // 0=no, 1=yes for out files only os << "sample_enzyme_number = " << enzyme_number << "\n"; // Sample enzyme which is possibly different than the one applied to the search. // mzXML parameters map<string,int> override_charge; override_charge["keep any known"] = 0; override_charge["ignore known"] = 1; override_charge["ignore outside range"] = 2; override_charge["keep known search unknown"] = 3; int precursor_charge_min(0), precursor_charge_max(0); if (!parseRange_(getStringOption_("precursor_charge"), precursor_charge_min, precursor_charge_max)) { LOG_INFO << "precursor_charge range not set. Defaulting to 0:0 (disable charge filtering)." << endl; } os << "scan_range = " << "0 0" << "\n"; // start and scan scan range to search; 0 as 1st entry ignores parameter os << "precursor_charge = " << precursor_charge_min << " " << precursor_charge_max << "\n"; // precursor charge range to analyze; does not override any existing charge; 0 as 1st entry ignores parameter os << "override_charge = " << override_charge[getStringOption_("override_charge")] << "\n"; // 0=no, 1=override precursor charge states, 2=ignore precursor charges outside precursor_charge range, 3=see online os << "ms_level = " << getIntOption_("ms_level") << "\n"; // MS level to analyze, valid are levels 2 (default) or 3 os << "activation_method = " << getStringOption_("activation_method") << "\n"; // activation method; used if activation method set; allowed ALL, CID, ECD, ETD, PQD, HCD, IRMPD // misc parameters double digest_mass_range_min(600.0), digest_mass_range_max(5000.0); if (!parseRange_(getStringOption_("digest_mass_range"), digest_mass_range_min, digest_mass_range_max)) { LOG_INFO << "digest_mass_range not set. Defaulting to 600.0 5000.0." << endl; } os << "digest_mass_range = " << digest_mass_range_min << " " << digest_mass_range_max << "\n"; // MH+ peptide mass range to analyze os << "num_results = " << 100 << "\n"; // number of search hits to store internally os << "skip_researching = " << 1 << "\n"; // for '.out' file output only, 0=search everything again (default), 1=don't search if .out exists os << "max_fragment_charge = " << getIntOption_("max_fragment_charge") << "\n"; // set maximum fragment charge state to analyze (allowed max 5) os << "max_precursor_charge = " << getIntOption_("max_precursor_charge") << "\n"; // set maximum precursor charge state to analyze (allowed max 9) os << "nucleotide_reading_frame = " << 0 << "\n"; // 0=proteinDB, 1-6, 7=forward three, 8=reverse three, 9=all six os << "clip_nterm_methionine = " << (int)(getStringOption_("clip_nterm_methionine")=="true") << "\n"; // 0=leave sequences as-is; 1=also consider sequence w/o N-term methionine os << "spectrum_batch_size = " << getIntOption_("spectrum_batch_size") << "\n"; // max. // of spectra to search at a time; 0 to search the entire scan range in one loop os << "decoy_prefix = " << "--decoysearch-not-used--" << "\n"; // decoy entries are denoted by this string which is pre-pended to each protein accession os << "output_suffix = " << "" << "\n"; // add a suffix to output base names i.e. suffix "-C" generates base-C.pep.xml from base.mzXML input os << "mass_offsets = " << ListUtils::concatenate(getDoubleList_("mass_offsets"), " ") << "\n"; // one or more mass offsets to search (values subtracted from deconvoluted precursor mass) // spectral processing map<string,int> remove_precursor_peak; remove_precursor_peak["no"] = 0; remove_precursor_peak["yes"] = 1; remove_precursor_peak["charge_reduced"] = 2; remove_precursor_peak["phosphate_loss"] = 3; double clear_mz_range_min(0.0), clear_mz_range_max(0.0); if (!parseRange_(getStringOption_("clear_mz_range"), clear_mz_range_min, clear_mz_range_max)) { LOG_INFO << "clear_mz_range not set. Defaulting to 0:0 (disable m/z filter)." << endl; } os << "minimum_peaks = " << getIntOption_("minimum_peaks") << "\n"; // required minimum number of peaks in spectrum to search (default 10) os << "minimum_intensity = " << getDoubleOption_("minimum_intensity") << "\n"; // minimum intensity value to read in os << "remove_precursor_peak = " << remove_precursor_peak[getStringOption_("remove_precursor_peak")] << "\n"; // 0=no, 1=yes, 2=all charge reduced precursor peaks (for ETD) os << "remove_precursor_tolerance = " << getDoubleOption_("remove_precursor_tolerance") << "\n"; // +- Da tolerance for precursor removal os << "clear_mz_range = " << clear_mz_range_min << " " << clear_mz_range_max << "\n"; // for iTRAQ/TMT type data; will clear out all peaks in the specified m/z range // write fixed modifications - if not specified residue parameter is zero // Aminoacid: // add_AA.OneletterCode_AA.ThreeLetterCode = xxx // Terminus: // add_N/Cterm_peptide = xxx protein not available yet vector<String> fixed_modifications_names = getStringList_("fixed_modifications"); vector<ResidueModification> fixed_modifications = getModifications_(fixed_modifications_names); // Comet sets Carbamidometyl (C) as modification as default even if not specified // Therefor there is the need to set it to 0 if not set as flag if (fixed_modifications.empty()) { os << "add_C_cysteine = 0.0000" << endl; } else { for (vector<ResidueModification>::const_iterator it = fixed_modifications.begin(); it != fixed_modifications.end(); ++it) { String AA = it->getOrigin(); if ((AA!="N-term") && (AA!="C-term")) { const Residue* r = ResidueDB::getInstance()->getResidue(AA); String name = r->getName(); os << "add_" << r->getOneLetterCode() << "_" << name.toLower() << " = " << it->getDiffMonoMass() << endl; } else { os << "add_" << AA.erase(1,1) << "_peptide = " << it->getDiffMonoMass() << endl; } } } //TODO register cut_before and cut_after in Enzymes.xml plus datastructures to add all our Enzymes with our names instead. // COMET_ENZYME_INFO _must_ be at the end of this parameters file os << "[COMET_ENZYME_INFO]" << "\n"; os << "0. No_enzyme 0 - -" << "\n"; os << "1. Trypsin 1 KR P" << "\n"; os << "2. Trypsin/P 1 KR -" << "\n"; os << "3. Lys_C 1 K P" << "\n"; os << "4. Lys_N 0 K -" << "\n"; os << "5. Arg_C 1 R P" << "\n"; os << "6. Asp_N 0 D -" << "\n"; os << "7. CNBr 1 M -" << "\n"; os << "8. Glu_C 1 DE P" << "\n"; os << "9. PepsinA 1 FL P" << "\n"; os << "10. Chymotrypsin 1 FWYL P" << "\n"; }