int main() { // construct a AASequence object, query a residue // and output some of its properties AASequence aas = AASequence::fromString("DECIANGER"); cout << aas[2].getName() << " " << aas[2].getFormula().toString() << " " << aas[2].getModificationName() << " " << aas[2].getMonoWeight() << endl; // find a modification in ModificationsDB // and output some of its properties // getInstance() returns a pointer to a ModsDB instance ResidueModification mod = ModificationsDB::getInstance()->getModification("Carbamidomethyl (C)"); cout << mod.getOrigin() << " " << mod.getFullId() << " " << mod.getDiffMonoMass() << " " << mod.getMonoMass() << endl; // set the modification on a residue of a peptide // and output some of its properties (the formula and mass have changed) // in this case ModificationsDB is used in the background // to relate the name of the mod to its attributes aas.setModification(2, "Carbamidomethyl (C)"); cout << aas[2].getName() << " " << aas[2].getFormula().toString() << " " << aas[2].getModificationName() << " " << aas[2].getMonoWeight() << endl; return 0; } //end of main
void OpenSwathDataAccessHelper::convertTargetedPeptide(const TargetedExperiment::Peptide& pep, OpenSwath::LightPeptide & p) { OpenSwath::LightModification m; OpenMS::ModificationsDB* mod_db = OpenMS::ModificationsDB::getInstance(); p.id = pep.id; if (!pep.rts.empty()) { p.rt = pep.rts[0].getCVTerms()["MS:1000896"][0].getValue().toString().toDouble(); } p.charge = pep.getChargeState(); p.sequence = pep.sequence; p.peptide_group_label = pep.getPeptideGroupLabel(); p.protein_refs.clear(); if (!pep.protein_refs.empty()) { p.protein_refs.insert( p.protein_refs.begin(), pep.protein_refs.begin(), pep.protein_refs.end() ); } // Mapping of peptide modifications { OpenMS::AASequence aa_sequence = TargetedExperimentHelper::getAASequence(pep); if ( !aa_sequence.getNTerminalModification().empty()) { ResidueModification rmod = mod_db->getTerminalModification(aa_sequence.getNTerminalModification(), ResidueModification::N_TERM); m.location = -1; m.unimod_id = rmod.getUniModAccession(); p.modifications.push_back(m); } if ( !aa_sequence.getCTerminalModification().empty()) { ResidueModification rmod = mod_db->getTerminalModification(aa_sequence.getCTerminalModification(), ResidueModification::C_TERM); m.location = boost::numeric_cast<int>(aa_sequence.size()); m.unimod_id = rmod.getUniModAccession(); p.modifications.push_back(m); } for (Size i = 0; i != aa_sequence.size(); i++) { if (aa_sequence[i].isModified()) { // search the residue in the modification database (if the sequence is valid, we should find it) ResidueModification rmod = mod_db->getModification(aa_sequence.getResidue(i).getOneLetterCode(), aa_sequence.getResidue(i).getModification(), ResidueModification::ANYWHERE); m.location = boost::numeric_cast<int>(i); m.unimod_id = rmod.getUniModAccession(); p.modifications.push_back(m); } } } // transition_exp.peptides.push_back(p); }
START_SECTION((double getNeutralLossMonoMass() const)) NOT_TESTABLE END_SECTION START_SECTION((void setNeutralLossAverageMass(double average_mass))) ptr->setNeutralLossAverageMass(23.345678); TEST_REAL_SIMILAR(ptr->getNeutralLossAverageMass(), 23.345678) END_SECTION START_SECTION(double getNeutralLossAverageMass() const) NOT_TESTABLE END_SECTION START_SECTION((bool hasNeutralLoss() const)) TEST_EQUAL(ptr->hasNeutralLoss(), true) ResidueModification mod; TEST_EQUAL(mod.hasNeutralLoss(), false) mod.setNeutralLossDiffFormula(EmpiricalFormula("H2O")); TEST_EQUAL(mod.hasNeutralLoss(), true) END_SECTION START_SECTION((void setFullId(const String& full_id))) ptr->setFullId("blubb_new_fullid"); TEST_STRING_EQUAL(ptr->getFullId(), "blubb_new_fullid") END_SECTION START_SECTION((const String& getFullId() const)) NOT_TESTABLE END_SECTION START_SECTION((void setUniModAccession(const String &id)))
void MassDecompositionAlgorithm::updateMembers_() { // todo add accessor to tolerance, it is called very often in CID mode Map<char, double> aa_to_weight; set<const Residue *> residues = ResidueDB::getInstance()->getResidues((String)param_.getValue("residue_set")); for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it) { aa_to_weight[(*it)->getOneLetterCode()[0]] = (*it)->getMonoWeight(Residue::Internal); } // now handle the modifications ModificationDefinitionsSet mod_set(param_.getValue("fixed_modifications"), param_.getValue("variable_modifications")); set<ModificationDefinition> fixed_mods = mod_set.getFixedModifications(); for (set<ModificationDefinition>::const_iterator it = fixed_mods.begin(); it != fixed_mods.end(); ++it) { const ResidueModification& mod = it->getModification(); char aa = ' '; if (mod.getOrigin() == 'X') { cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { aa = mod.getOrigin(); } if (mod.getMonoMass() != 0) { aa_to_weight[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight[aa] += mod.getDiffMonoMass(); } else { cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } } const StringList mod_names(ListUtils::create<String>("a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z")); vector<String>::const_iterator actual_mod_name = mod_names.begin(); set<ModificationDefinition> var_mods = mod_set.getVariableModifications(); for (set<ModificationDefinition>::const_iterator it = var_mods.begin(); it != var_mods.end(); ++it) { ResidueModification mod = it->getModification(); //cerr << it->getModification() << " " << mod.getOrigin() << " " << mod.getId() << " " << mod.getFullId() << " " << mod.getUniModAccession() << " " << mod.getPSIMODAccession() << endl; char aa = (*actual_mod_name)[0]; char origin_aa = ' '; ++actual_mod_name; if (mod.getOrigin() == 'X') { cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { origin_aa = mod.getOrigin(); } if (mod.getMonoMass() != 0) { aa_to_weight[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight[aa] = aa_to_weight[origin_aa] + mod.getDiffMonoMass(); } else { cerr << "Warning: cannot handle modification " << mod.getName() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } } if (alphabet_ != nullptr) { delete alphabet_; } if (decomposer_ != nullptr) { delete decomposer_; } // init mass decomposer alphabet_ = new ims::IMSAlphabet(); for (Map<char, double>::ConstIterator it = aa_to_weight.begin(); it != aa_to_weight.end(); ++it) { alphabet_->push_back(String(it->first), it->second); } // initializes weights ims::Weights weights(alphabet_->getMasses(), (double) param_.getValue("decomp_weights_precision")); // optimize alphabet by dividing by gcd weights.divideByGCD(); // decomposes real values decomposer_ = new ims::RealMassDecomposer(weights); return; }
/// returns false on failure void translateModifications(StringList& static_mod_list, StringList& variable_mod_list) { // translating UNIMOD notation to MyriMatch notation of PTMs. ModificationDefinitionsSet mod_set(getStringList_("fixed_modifications"), getStringList_("variable_modifications")); if (!getStringList_("fixed_modifications").empty()) { set<String> mod_names = mod_set.getFixedModificationNames(); for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(*it); String origin = String(mod.getOrigin()); String mass_diff = String(mod.getDiffMonoMass()); if (origin == "N-term") { origin = "("; } else if (origin == "C-term") { origin = ")"; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term") { origin = "(" + origin; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term") { origin = ")" + origin; } static_mod_list.push_back(origin + " " + mod.getDiffMonoMass()); } } if (!getStringList_("variable_modifications").empty()) { set<String> mod_names = mod_set.getVariableModificationNames(); for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(*it); String origin = String(mod.getOrigin()); String mass_diff = String(mod.getDiffMonoMass()); if (origin == "N-term") { origin = "("; } else if (origin == "C-term") { origin = ")"; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term") { origin = "(" + origin; } else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term") { origin = ")" + origin; } variable_mod_list.push_back(origin + " * " + mass_diff); // use * for all mods (no unique-per-mod symbol should be required) } } }
void createParamFile_(ostream& os) { os << "# comet_version " << getStringOption_("comet_version") << "\n"; //required as first line in the param file os << "# Comet MS/MS search engine parameters file.\n"; os << "# Everything following the '#' symbol is treated as a comment.\n"; os << "database_name = " << getStringOption_("database") << "\n"; os << "decoy_search = " << 0 << "\n"; // 0=no (default), 1=concatenated search, 2=separate search os << "num_threads = " << getIntOption_("threads") << "\n"; // 0=poll CPU to set num threads; else specify num threads directly (max 64) // masses map<String,int> precursor_error_units; precursor_error_units["amu"] = 0; precursor_error_units["mmu"] = 1; precursor_error_units["ppm"] = 2; map<string,int> isotope_error; isotope_error["off"] = 0; isotope_error["-1/0/1/2/3"] = 1; isotope_error["-8/-4/0/4/8"] = 2; os << "peptide_mass_tolerance = " << getDoubleOption_("precursor_mass_tolerance") << "\n"; os << "peptide_mass_units = " << precursor_error_units[getStringOption_("precursor_error_units")] << "\n"; // 0=amu, 1=mmu, 2=ppm os << "mass_type_parent = " << 1 << "\n"; // 0=average masses, 1=monoisotopic masses os << "mass_type_fragment = " << 1 << "\n"; // 0=average masses, 1=monoisotopic masses os << "precursor_tolerance_type = " << 0 << "\n"; // 0=MH+ (default), 1=precursor m/z; only valid for amu/mmu tolerances os << "isotope_error = " << isotope_error[getStringOption_("isotope_error")] << "\n"; // 0=off, 1=on -1/0/1/2/3 (standard C13 error), 2= -8/-4/0/4/8 (for +4/+8 labeling) // search enzyme String enzyme_name = getStringOption_("enzyme"); String enzyme_number = String(ProteaseDB::getInstance()->getEnzyme(enzyme_name)->getCometID()); map<string,int> num_enzyme_termini; num_enzyme_termini["semi"] = 1; num_enzyme_termini["fully"] = 2; num_enzyme_termini["C-term unspecific"] = 8; num_enzyme_termini["N-term unspecific"] = 9; os << "search_enzyme_number = " << enzyme_number << "\n"; // choose from list at end of this params file os << "num_enzyme_termini = " << num_enzyme_termini[getStringOption_("num_enzyme_termini")] << "\n"; // 1 (semi-digested), 2 (fully digested, default), 8 C-term unspecific , 9 N-term unspecific os << "allowed_missed_cleavage = " << getIntOption_("allowed_missed_cleavages") << "\n"; // maximum value is 5; for enzyme search // Up to 9 variable modifications are supported // format: <mass> <residues> <0=variable/else binary> <max_mods_per_peptide> <term_distance> <n/c-term> <required> // e.g. 79.966331 STY 0 3 -1 0 0 vector<String> variable_modifications_names = getStringList_("variable_modifications"); vector<ResidueModification> variable_modifications = getModifications_(variable_modifications_names); if (variable_modifications.size() > 9) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Comet only supports 9 variable modifications. " + String(variable_modifications.size()) + " provided."); } int max_variable_mods_in_peptide = getIntOption_("max_variable_mods_in_peptide"); Size var_mod_index = 0; // write out user specified modifications for (; var_mod_index < variable_modifications.size(); ++var_mod_index) { const ResidueModification mod = variable_modifications[var_mod_index]; double mass = mod.getDiffMonoMass(); String residues = mod.getOrigin(); //TODO support binary groups, e.g. for SILAC int binary_group = 0; //TODO support mod-specific limit (default for now is the overall max per peptide) int max_current_mod_per_peptide = max_variable_mods_in_peptide; //TODO support term-distances? int term_distance = -1; int nc_term = 0; //TODO support agglomeration of Modifications to same AA. Watch out for nc_term value then. if (mod.getTermSpecificity() == ResidueModification::C_TERM) { residues = "c"; term_distance = 0; // Since users need to specify mods that apply to multiple residues/terms separately // 3 and -1 should be equal for now. nc_term = 3; } else if (mod.getTermSpecificity() == ResidueModification::N_TERM) { residues = "n"; term_distance = 0; // Since users need to specify mods that apply to multiple residues/terms separately // 2 and -1 should be equal for now. nc_term = 2; } else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_N_TERM) // not yet available { term_distance = 0; nc_term = 0; } else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_C_TERM) // not yet available { term_distance = 0; nc_term = 1; } //TODO support required variable mods bool required = false; os << "variable_mod0" << var_mod_index+1 << " = " << mass << " " << residues << " " << binary_group << " " << max_current_mod_per_peptide << " " << term_distance << " " << nc_term << " " << required << "\n"; } // fill remaining modification slots (if any) in Comet with "no modification" for (; var_mod_index < 9; ++var_mod_index) { os << "variable_mod0" << var_mod_index+1 << " = " << "0.0 X 0 3 -1 0 0" << "\n"; } os << "max_variable_mods_in_peptide = " << getIntOption_("max_variable_mods_in_peptide") << "\n"; os << "require_variable_mod = " << (int) (getStringOption_("require_variable_mod") == "true") << "\n"; // fragment ion defaults // ion trap ms/ms: 1.0005 tolerance, 0.4 offset (mono masses), theoretical_fragment_ions = 1 // high res ms/ms: 0.02 tolerance, 0.0 offset (mono masses), theoretical_fragment_ions = 0 String instrument = getStringOption_("instrument"); double bin_tol = getDoubleOption_("fragment_bin_tolerance"); double bin_offset = getDoubleOption_("fragment_bin_offset"); if (instrument == "low_res" && (bin_tol < 0.9 || bin_offset <= 0.2)) { LOG_WARN << "Fragment bin size or tolerance is quite low for low res instruments." << "\n"; } else if (instrument == "high_res" && (bin_tol > 0.2 || bin_offset > 0.1)) { LOG_WARN << "Fragment bin size or tolerance is quite high for high res instruments." << "\n"; }; os << "fragment_bin_tol = " << bin_tol << "\n"; // binning to use on fragment ions os << "fragment_bin_offset = " << bin_offset << "\n"; // offset position to start the binning (0.0 to 1.0) os << "theoretical_fragment_ions = " << (int)(instrument == "low_res") << "\n"; // 0=use flanking bin, 1=use M bin only os << "use_A_ions = " << (int)(getStringOption_("use_A_ions")=="true") << "\n"; os << "use_B_ions = " << (int)(getStringOption_("use_B_ions")=="true") << "\n"; os << "use_C_ions = " << (int)(getStringOption_("use_C_ions")=="true") << "\n"; os << "use_X_ions = " << (int)(getStringOption_("use_X_ions")=="true") << "\n"; os << "use_Y_ions = " << (int)(getStringOption_("use_Y_ions")=="true") << "\n"; os << "use_Z_ions = " << (int)(getStringOption_("use_Z_ions")=="true") << "\n"; os << "use_NL_ions = " << (int)(getStringOption_("use_NL_ions")=="true") << "\n"; // 0=no, 1=yes to consider NH3/H2O neutral loss peaks // output os << "output_sqtstream = " << 0 << "\n"; // 0=no, 1=yes write sqt to standard output os << "output_sqtfile = " << 0 << "\n"; // 0=no, 1=yes write sqt file os << "output_txtfile = " << 0 << "\n"; // 0=no, 1=yes write tab-delimited txt file os << "output_pepxmlfile = " << 1 << "\n"; // 0=no, 1=yes write pep.xml file os << "output_percolatorfile = " << !getStringOption_("pin_out").empty() << "\n"; // 0=no, 1=yes write Percolator tab-delimited input file os << "output_outfiles = " << 0 << "\n"; // 0=no, 1=yes write .out files os << "print_expect_score = " << 1 << "\n"; // 0=no, 1=yes to replace Sp with expect in out & sqt os << "num_output_lines = " << getIntOption_("num_hits") << "\n"; // num peptide results to show os << "show_fragment_ions = " << 0 << "\n"; // 0=no, 1=yes for out files only os << "sample_enzyme_number = " << enzyme_number << "\n"; // Sample enzyme which is possibly different than the one applied to the search. // mzXML parameters map<string,int> override_charge; override_charge["keep any known"] = 0; override_charge["ignore known"] = 1; override_charge["ignore outside range"] = 2; override_charge["keep known search unknown"] = 3; int precursor_charge_min(0), precursor_charge_max(0); if (!parseRange_(getStringOption_("precursor_charge"), precursor_charge_min, precursor_charge_max)) { LOG_INFO << "precursor_charge range not set. Defaulting to 0:0 (disable charge filtering)." << endl; } os << "scan_range = " << "0 0" << "\n"; // start and scan scan range to search; 0 as 1st entry ignores parameter os << "precursor_charge = " << precursor_charge_min << " " << precursor_charge_max << "\n"; // precursor charge range to analyze; does not override any existing charge; 0 as 1st entry ignores parameter os << "override_charge = " << override_charge[getStringOption_("override_charge")] << "\n"; // 0=no, 1=override precursor charge states, 2=ignore precursor charges outside precursor_charge range, 3=see online os << "ms_level = " << getIntOption_("ms_level") << "\n"; // MS level to analyze, valid are levels 2 (default) or 3 os << "activation_method = " << getStringOption_("activation_method") << "\n"; // activation method; used if activation method set; allowed ALL, CID, ECD, ETD, PQD, HCD, IRMPD // misc parameters double digest_mass_range_min(600.0), digest_mass_range_max(5000.0); if (!parseRange_(getStringOption_("digest_mass_range"), digest_mass_range_min, digest_mass_range_max)) { LOG_INFO << "digest_mass_range not set. Defaulting to 600.0 5000.0." << endl; } os << "digest_mass_range = " << digest_mass_range_min << " " << digest_mass_range_max << "\n"; // MH+ peptide mass range to analyze os << "num_results = " << 100 << "\n"; // number of search hits to store internally os << "skip_researching = " << 1 << "\n"; // for '.out' file output only, 0=search everything again (default), 1=don't search if .out exists os << "max_fragment_charge = " << getIntOption_("max_fragment_charge") << "\n"; // set maximum fragment charge state to analyze (allowed max 5) os << "max_precursor_charge = " << getIntOption_("max_precursor_charge") << "\n"; // set maximum precursor charge state to analyze (allowed max 9) os << "nucleotide_reading_frame = " << 0 << "\n"; // 0=proteinDB, 1-6, 7=forward three, 8=reverse three, 9=all six os << "clip_nterm_methionine = " << (int)(getStringOption_("clip_nterm_methionine")=="true") << "\n"; // 0=leave sequences as-is; 1=also consider sequence w/o N-term methionine os << "spectrum_batch_size = " << getIntOption_("spectrum_batch_size") << "\n"; // max. // of spectra to search at a time; 0 to search the entire scan range in one loop os << "decoy_prefix = " << "--decoysearch-not-used--" << "\n"; // decoy entries are denoted by this string which is pre-pended to each protein accession os << "output_suffix = " << "" << "\n"; // add a suffix to output base names i.e. suffix "-C" generates base-C.pep.xml from base.mzXML input os << "mass_offsets = " << ListUtils::concatenate(getDoubleList_("mass_offsets"), " ") << "\n"; // one or more mass offsets to search (values subtracted from deconvoluted precursor mass) // spectral processing map<string,int> remove_precursor_peak; remove_precursor_peak["no"] = 0; remove_precursor_peak["yes"] = 1; remove_precursor_peak["charge_reduced"] = 2; remove_precursor_peak["phosphate_loss"] = 3; double clear_mz_range_min(0.0), clear_mz_range_max(0.0); if (!parseRange_(getStringOption_("clear_mz_range"), clear_mz_range_min, clear_mz_range_max)) { LOG_INFO << "clear_mz_range not set. Defaulting to 0:0 (disable m/z filter)." << endl; } os << "minimum_peaks = " << getIntOption_("minimum_peaks") << "\n"; // required minimum number of peaks in spectrum to search (default 10) os << "minimum_intensity = " << getDoubleOption_("minimum_intensity") << "\n"; // minimum intensity value to read in os << "remove_precursor_peak = " << remove_precursor_peak[getStringOption_("remove_precursor_peak")] << "\n"; // 0=no, 1=yes, 2=all charge reduced precursor peaks (for ETD) os << "remove_precursor_tolerance = " << getDoubleOption_("remove_precursor_tolerance") << "\n"; // +- Da tolerance for precursor removal os << "clear_mz_range = " << clear_mz_range_min << " " << clear_mz_range_max << "\n"; // for iTRAQ/TMT type data; will clear out all peaks in the specified m/z range // write fixed modifications - if not specified residue parameter is zero // Aminoacid: // add_AA.OneletterCode_AA.ThreeLetterCode = xxx // Terminus: // add_N/Cterm_peptide = xxx protein not available yet vector<String> fixed_modifications_names = getStringList_("fixed_modifications"); vector<ResidueModification> fixed_modifications = getModifications_(fixed_modifications_names); // Comet sets Carbamidometyl (C) as modification as default even if not specified // Therefor there is the need to set it to 0 if not set as flag if (fixed_modifications.empty()) { os << "add_C_cysteine = 0.0000" << endl; } else { for (vector<ResidueModification>::const_iterator it = fixed_modifications.begin(); it != fixed_modifications.end(); ++it) { String AA = it->getOrigin(); if ((AA!="N-term") && (AA!="C-term")) { const Residue* r = ResidueDB::getInstance()->getResidue(AA); String name = r->getName(); os << "add_" << r->getOneLetterCode() << "_" << name.toLower() << " = " << it->getDiffMonoMass() << endl; } else { os << "add_" << AA.erase(1,1) << "_peptide = " << it->getDiffMonoMass() << endl; } } } //TODO register cut_before and cut_after in Enzymes.xml plus datastructures to add all our Enzymes with our names instead. // COMET_ENZYME_INFO _must_ be at the end of this parameters file os << "[COMET_ENZYME_INFO]" << "\n"; os << "0. No_enzyme 0 - -" << "\n"; os << "1. Trypsin 1 KR P" << "\n"; os << "2. Trypsin/P 1 KR -" << "\n"; os << "3. Lys_C 1 K P" << "\n"; os << "4. Lys_N 0 K -" << "\n"; os << "5. Arg_C 1 R P" << "\n"; os << "6. Asp_N 0 D -" << "\n"; os << "7. CNBr 1 M -" << "\n"; os << "8. Glu_C 1 DE P" << "\n"; os << "9. PepsinA 1 FL P" << "\n"; os << "10. Chymotrypsin 1 FWYL P" << "\n"; }
void Residue::setModification(const String & modification) { //modification_ = modification; ModificationsDB * mod_db = ModificationsDB::getInstance(); ResidueModification mod = mod_db->getModification(one_letter_code_, modification, ResidueModification::ANYWHERE); modification_ = mod.getId(); // update all the members if (mod.getAverageMass() != 0) { average_weight_ = mod.getAverageMass(); } if (mod.getMonoMass() != 0) { mono_weight_ = mod.getMonoMass(); } bool updated_formula(false); if (!mod.getDiffFormula().isEmpty()) { updated_formula = true; setFormula(getFormula() + mod.getDiffFormula()); } if (mod.getFormula() != "" && !updated_formula) { updated_formula = true; String formula = mod.getFormula(); formula.removeWhitespaces(); formula_ = EmpiricalFormula(formula); } if (updated_formula) { average_weight_ = formula_.getAverageWeight(); mono_weight_ = formula_.getMonoWeight(); } else { if (mod.getAverageMass() != 0) { average_weight_ = mod.getAverageMass(); } if (mod.getMonoMass() != 0) { mono_weight_ = mod.getMonoMass(); } } // neutral losses loss_formulas_.clear(); loss_names_.clear(); if (mod.hasNeutralLoss()) { loss_formulas_.push_back(mod.getNeutralLossDiffFormula()); loss_names_.push_back(mod.getNeutralLossDiffFormula().toString()); } is_modified_ = true; }
void CompNovoIdentificationBase::updateMembers_() { // init residue mass table String residue_set(param_.getValue("residue_set")); set<const Residue *> residues = ResidueDB::getInstance()->getResidues(residue_set); for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it) { aa_to_weight_[(*it)->getOneLetterCode()[0]] = (*it)->getMonoWeight(Residue::Internal); } max_number_aa_per_decomp_ = param_.getValue("max_number_aa_per_decomp"); tryptic_only_ = param_.getValue("tryptic_only").toBool(); fragment_mass_tolerance_ = (DoubleReal)param_.getValue("fragment_mass_tolerance"); max_number_pivot_ = param_.getValue("max_number_pivot"); decomp_weights_precision_ = (DoubleReal)param_.getValue("decomp_weights_precision"); min_mz_ = (DoubleReal)param_.getValue("min_mz"); max_mz_ = (DoubleReal)param_.getValue("max_mz"); max_decomp_weight_ = (DoubleReal)param_.getValue("max_decomp_weight"); max_subscore_number_ = param_.getValue("max_subscore_number"); max_isotope_ = param_.getValue("max_isotope"); name_to_residue_.clear(); residue_to_name_.clear(); // now handle the modifications ModificationDefinitionsSet mod_set((StringList)param_.getValue("fixed_modifications"), (StringList)param_.getValue("variable_modifications")); set<ModificationDefinition> fixed_mods = mod_set.getFixedModifications(); for (set<ModificationDefinition>::const_iterator it = fixed_mods.begin(); it != fixed_mods.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification()); char aa = ' '; if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X") { cerr << "Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { aa = mod.getOrigin()[0]; } if (mod.getMonoMass() != 0) { aa_to_weight_[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight_[aa] += mod.getDiffMonoMass(); } else { cerr << "Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } //cerr << "Setting fixed modification " << it->getModification() << " of amino acid '" << aa << "'; weight = " << aa_to_weight_[aa] << endl; const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification()); name_to_residue_[aa] = res; residue_to_name_[res] = aa; } const StringList mod_names(StringList::create("a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z")); vector<String>::const_iterator actual_mod_name = mod_names.begin(); set<ModificationDefinition> var_mods = mod_set.getVariableModifications(); for (set<ModificationDefinition>::const_iterator it = var_mods.begin(); it != var_mods.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification()); char aa = (*actual_mod_name)[0]; char origin_aa = ' '; ++actual_mod_name; if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X") { cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { origin_aa = mod.getOrigin()[0]; } if (mod.getMonoMass() != 0) { aa_to_weight_[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight_[aa] = aa_to_weight_[origin_aa] + mod.getDiffMonoMass(); } else { cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } //cerr << "Mapping variable modification " << it->getModification() << " to letter '" << aa << "' (@" << origin_aa << "); weight = " << aa_to_weight_[aa] << endl; const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification()); name_to_residue_[aa] = res; residue_to_name_[res] = aa; } /* cerr << "Following masses are used for identification: " << endl; for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it) { cerr << it->first << " " << precisionWrapper(it->second) << endl; }*/ initIsotopeDistributions_(); Param decomp_param(mass_decomp_algorithm_.getParameters()); decomp_param.setValue("tolerance", fragment_mass_tolerance_); decomp_param.setValue("fixed_modifications", (StringList)param_.getValue("fixed_modifications")); decomp_param.setValue("variable_modifications", (StringList)param_.getValue("variable_modifications")); mass_decomp_algorithm_.setParameters(decomp_param); min_aa_weight_ = numeric_limits<DoubleReal>::max(); for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it) { if (min_aa_weight_ > it->second) { min_aa_weight_ = it->second; } } return; }