double Residue::getMonoWeight(ResidueType res_type) const { switch (res_type) { case Full: return mono_weight_; case Internal: return mono_weight_ - getInternalToFullMonoWeight(); case NTerminal: return mono_weight_ - getNTerminalToFullMonoWeight(); case CTerminal: return mono_weight_ - getCTerminalToFullMonoWeight(); case BIon: return mono_weight_ - getBIonToFullMonoWeight(); case AIon: return mono_weight_ - getAIonToFullMonoWeight(); case CIonMinusOne: return mono_weight_ - getCIonMinusOneToFullMonoWeight(); case CIon: return mono_weight_ - EmpiricalFormula("OH").getMonoWeight() + EmpiricalFormula("NH").getMonoWeight(); case CIonPlusOne: return mono_weight_ - getCIonPlusOneToFullMonoWeight(); case CIonPlusTwo: return mono_weight_ - getCIonPlusTwoToFullMonoWeight(); case XIon: return mono_weight_ + getXIonToFullMonoWeight(); case YIon: return mono_weight_ + getYIonToFullMonoWeight(); case ZIonMinusOne: return mono_weight_ - getZIonMinusOneToFullMonoWeight(); case ZIon: return mono_weight_ - getZIonToFullMonoWeight(); case ZIonPlusOne: return mono_weight_ - getZIonPlusOneToFullMonoWeight(); case ZIonPlusTwo: return mono_weight_ - getZIonPlusTwoToFullMonoWeight(); default: cerr << "Residue::getMonoWeight: unknown ResidueType" << endl; return mono_weight_; } }
EmpiricalFormula Residue::getFormula(ResidueType res_type) const { switch (res_type) { case Full: return formula_; case Internal: return internal_formula_; case NTerminal: return formula_ - getNTerminalToFull(); case CTerminal: return formula_ - getCTerminalToFull(); case BIon: return formula_ - getBIonToFull(); case AIon: return formula_ - getAIonToFull(); case CIonMinusOne: return formula_ - getCIonMinusOneToFull(); case CIon: return formula_ - EmpiricalFormula("OH") + EmpiricalFormula("NH"); case XIon: return formula_ + getXIonToFull(); case YIon: return formula_ + getYIonToFull(); case ZIonMinusOne: return formula_ - getZIonMinusOneToFull(); case ZIon: return formula_ - getZIonToFull(); case ZIonPlusOne: return formula_ - getZIonPlusOneToFull(); case ZIonPlusTwo: return formula_ - getZIonPlusTwoToFull(); default: cerr << "Residue::getFormula: unknown ResidueType" << endl; return formula_; } }
void Residue::setModification(const String & modification) { //modification_ = modification; ModificationsDB * mod_db = ModificationsDB::getInstance(); ResidueModification mod = mod_db->getModification(one_letter_code_, modification, ResidueModification::ANYWHERE); modification_ = mod.getId(); // update all the members if (mod.getAverageMass() != 0) { average_weight_ = mod.getAverageMass(); } if (mod.getMonoMass() != 0) { mono_weight_ = mod.getMonoMass(); } bool updated_formula(false); if (!mod.getDiffFormula().isEmpty()) { updated_formula = true; setFormula(getFormula() + mod.getDiffFormula()); } if (mod.getFormula() != "" && !updated_formula) { updated_formula = true; String formula = mod.getFormula(); formula.removeWhitespaces(); formula_ = EmpiricalFormula(formula); } if (updated_formula) { average_weight_ = formula_.getAverageWeight(); mono_weight_ = formula_.getMonoWeight(); } else { if (mod.getAverageMass() != 0) { average_weight_ = mod.getAverageMass(); } if (mod.getMonoMass() != 0) { mono_weight_ = mod.getMonoMass(); } } // neutral losses loss_formulas_.clear(); loss_names_.clear(); if (mod.hasNeutralLoss()) { loss_formulas_.push_back(mod.getNeutralLossDiffFormula()); loss_names_.push_back(mod.getNeutralLossDiffFormula().toString()); } is_modified_ = true; }
void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); Peak1D p; DoubleReal b_pos(0.0 + prefix); DoubleReal y_pos(h2o_mass + suffix); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; if (b_pos > min_mz_ && b_pos < max_mz_) { p.setPosition(b_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } if (y_pos > min_mz_ && y_pos < max_mz_) { p.setPosition(y_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } } spec.sortByPosition(); return; }
EmpiricalFormula IsotopeModel::getFormula() { CoordinateType mass = mean_ * charge_; Int C_num = Int(0.5 + mass * averagine_[C]); Int N_num = Int(0.5 + mass * averagine_[N]); Int O_num = Int(0.5 + mass * averagine_[O]); Int H_num = Int(0.5 + mass * averagine_[H]); Int S_num = Int(0.5 + mass * averagine_[S]); String form; if (C_num) form.append("C").append(String(C_num)); if (H_num) form.append("H").append(String(H_num)); if (N_num) form.append("N").append(String(N_num)); if (O_num) form.append("O").append(String(O_num)); if (S_num) form.append("S").append(String(S_num)); return EmpiricalFormula(form); }
Residue* ResidueDB::parseResidue_(Map<String, String>& values) { vector<EmpiricalFormula> low_mass_ions; Residue* res_ptr = new Residue(); for (Map<String, String>::iterator it = values.begin(); it != values.end(); ++it) { String key(it->first); String value(it->second); if (key.hasSuffix(":Name")) { res_ptr->setName(value); continue; } if (key.hasSuffix(":ShortName")) { res_ptr->setShortName(value); continue; } if (key.hasSuffix(":ThreeLetterCode")) { res_ptr->setThreeLetterCode(value); continue; } if (key.hasSuffix(":OneLetterCode")) { res_ptr->setOneLetterCode(value); continue; } if (key.hasSuffix(":Formula")) { EmpiricalFormula formula(value); res_ptr->setFormula(EmpiricalFormula(value)); res_ptr->setAverageWeight(formula.getAverageWeight()); res_ptr->setMonoWeight(formula.getMonoWeight()); continue; } if (key.hasSubstring(":Losses:LossName")) { res_ptr->addLossName(value); continue; } if (key.hasSubstring(":Losses:LossFormula")) { EmpiricalFormula loss(value); res_ptr->addLossFormula(loss); continue; } if (key.hasSubstring("NTermLosses:LossName")) { res_ptr->addNTermLossName(value); continue; } if (key.hasSubstring("NTermLosses:LossFormula")) { EmpiricalFormula loss(value); res_ptr->addNTermLossFormula(loss); continue; } if (key.hasSubstring("LowMassIons")) { // no markers defined? if (!key.hasSuffix(":")) { low_mass_ions.push_back(EmpiricalFormula(value)); } continue; } if (key.hasSubstring("Synonyms")) { // no synonyms defined? if (!key.hasSuffix(":")) { res_ptr->addSynonym(value); } continue; } if (key.hasSubstring("pka")) { // no pka defined? if (!key.hasSuffix(":")) { res_ptr->setPka(value.toDouble()); } continue; } if (key.hasSubstring("pkb")) { // no pkb defined? if (!key.hasSuffix(":")) { res_ptr->setPkb(value.toDouble()); } continue; } if (key.hasSubstring("pkc")) { // no pkc defined? if (!key.hasSuffix(":")) { res_ptr->setPkc(value.toDouble()); } continue; } if (key.hasSubstring("GB_SC")) { res_ptr->setSideChainBasicity(value.toDouble()); continue; } if (key.hasSubstring("GB_BB_L")) { res_ptr->setBackboneBasicityLeft(value.toDouble()); continue; } if (key.hasSubstring("GB_BB_R")) { res_ptr->setBackboneBasicityRight(value.toDouble()); continue; } if (key.hasSubstring("ResidueSets")) { StringList residue_sets = ListUtils::create<String>(value); for (StringList::const_iterator local_it = residue_sets.begin(); local_it != residue_sets.end(); ++local_it) { res_ptr->addResidueSet(*local_it); residue_sets_.insert(*local_it); } continue; } cerr << "unknown key: " << key << ", with value: " << value << endl; } if (!low_mass_ions.empty()) { res_ptr->setLowMassIons(low_mass_ions); } for (set<String>::const_iterator it = res_ptr->getResidueSets().begin(); it != res_ptr->getResidueSets().end(); ++it) { residues_by_set_[*it].insert(res_ptr); } return res_ptr; }
void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge) const { RichPeak1D p; // precursor peak double mono_pos = peptide.getMonoWeight(Residue::Full, charge) / double(charge); if (add_isotopes_) { IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_ * it->second); if (add_metainfo_) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_); if (add_metainfo_) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } // loss peaks of the precursor //loss of water EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O"); mono_pos = ion.getMonoWeight() / double(charge); if (add_isotopes_) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_H2O_ * it->second); if (add_metainfo_) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_H2O_); if (add_metainfo_) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } //loss of ammonia ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3"); mono_pos = ion.getMonoWeight() / double(charge); if (add_isotopes_) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_NH3_ * it->second); if (add_metainfo_) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_NH3_); if (add_metainfo_) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } spec.sortByPosition(); }
void TheoreticalSpectrumGenerator::addLosses_(RichPeakSpectrum & spectrum, const AASequence & ion, double intensity, Residue::ResidueType res_type, int charge) const { RichPeak1D p; set<String> losses; for (AASequence::ConstIterator it = ion.begin(); it != ion.end(); ++it) { if (it->hasNeutralLoss()) { vector<EmpiricalFormula> loss_formulas = it->getLossFormulas(); for (Size i = 0; i != loss_formulas.size(); ++i) { losses.insert(loss_formulas[i].toString()); } } } if (!add_isotopes_) { p.setIntensity(intensity * rel_loss_intensity_); } for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it) { EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it); // thanks to Chris and Sandro // check for negative element frequencies (might happen if losses are not allowed for specific ions) bool negative_elements(false); for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit) { if (eit->second < 0) { negative_elements = true; break; } } if (negative_elements) { continue; } double loss_pos = loss_ion.getMonoWeight() / (double)charge; const String& loss_name = *it; if (add_isotopes_) { IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso) { p.setMZ((double)(loss_pos + j) / (double)charge); p.setIntensity(intensity * rel_loss_intensity_ * iso->second); if (add_metainfo_ && j == 0) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } else { p.setMZ(loss_pos); if (add_metainfo_) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } }
// divide and conquer algorithm of the sequencing void CompNovoIdentificationCID::getDecompositionsDAC_(set<String> & sequences, Size left, Size right, double peptide_weight, const PeakSpectrum & CID_spec, Map<double, CompNovoIonScoringCID::IonScore> & ion_scores) { static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); double offset_suffix(CID_spec[left].getPosition()[0] - oxonium_mass); double offset_prefix(peptide_weight - CID_spec[right].getPosition()[0]); #ifdef DAC_DEBUG static Int depth_(0); ++depth_; String tabs_(depth_, '\t'); cerr << tabs_ << "void getDecompositionsDAC(sequences[" << sequences.size() << "], " << left << ", " << right << ") "; cerr << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " diff="; #endif double diff = CID_spec[right].getPosition()[0] - CID_spec[left].getPosition()[0]; #ifdef DAC_DEBUG cerr << diff << endl; cerr << "offset_prefix=" << offset_prefix << ", offset_suffix=" << offset_suffix << endl; #endif if (subspec_to_sequences_.has(left) && subspec_to_sequences_[left].has(right)) { sequences = subspec_to_sequences_[left][right]; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "from cache DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << " " << left << " " << right << endl; #endif return; } // no further solutions possible? if (diff < min_aa_weight_) { #ifdef DAC_DEBUG depth_--; #endif return; } // no further division needed? if (diff <= max_decomp_weight_) { vector<MassDecomposition> decomps; // if we are at the C-terminus use precursor_mass_tolerance_ if (offset_prefix < precursor_mass_tolerance_) { Param decomp_param(mass_decomp_algorithm_.getParameters()); decomp_param.setValue("tolerance", precursor_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); getDecompositions_(decomps, diff); decomp_param.setValue("tolerance", fragment_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); } else { getDecompositions_(decomps, diff); } //filterDecomps_(decomps); #ifdef DAC_DEBUG cerr << tabs_ << "Found " << decomps.size() << " decomps" << endl; cerr << tabs_ << "Permuting..."; #endif //static Map<String, set<String> > permute_cache; for (vector<MassDecomposition>::const_iterator it = decomps.begin(); it != decomps.end(); ++it) { #ifdef DAC_DEBUG cerr << it->toString() << endl; #endif String exp_string = it->toExpandedString(); if (!permute_cache_.has(exp_string)) { permute_("", exp_string, sequences); permute_cache_[exp_string] = sequences; } else { sequences = permute_cache_[exp_string]; } } #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; if (sequences.size() > max_subscore_number_) { cerr << tabs_ << "Reducing #sequences from " << sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; } #endif // C-terminus if (offset_suffix <= precursor_mass_tolerance_) { filterPermuts_(sequences); } // reduce the sequences reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG cerr << "ended" << endl; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; depth_--; #endif return; } // select suitable pivot peaks vector<Size> pivots; if (offset_suffix < precursor_mass_tolerance_ && offset_prefix < precursor_mass_tolerance_) { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, true); } else { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, false); } // run divide step #ifdef DAC_DEBUG cerr << tabs_ << "Selected " << pivots.size() << " pivot ions: "; for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { cerr << *it << "(" << CID_spec[*it].getPosition()[0] << ") "; } cerr << endl; #endif for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { set<String> seq1, seq2, new_sequences; // the smaller the 'gap' the greater the chance of not finding anything // so we we compute the smaller gap first double diff1(CID_spec[*it].getPosition()[0] - CID_spec[left].getPosition()[0]); double diff2(CID_spec[right].getPosition()[0] - CID_spec[*it].getPosition()[0]); if (diff1 < diff2) { getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); if (seq1.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "first call produced 0 candidates (" << diff1 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); } else { getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); if (seq2.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "second call produced 0 candidates (" << diff2 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); } #ifdef DAC_DEBUG cerr << tabs_ << "Found " << seq1.size() << " solutions (1) " << diff1 << endl; cerr << tabs_ << "Found " << seq2.size() << " solutions (2) " << diff2 << endl; cerr << tabs_ << "inserting " << seq1.size() * seq2.size() << " sequences" << endl; #endif // C-terminus if (offset_suffix <= fragment_mass_tolerance_) { filterPermuts_(seq1); } // test if we found enough sequence candidates if (seq1.empty() || seq2.empty()) { continue; } for (set<String>::const_iterator it1 = seq1.begin(); it1 != seq1.end(); ++it1) { for (set<String>::const_iterator it2 = seq2.begin(); it2 != seq2.end(); ++it2) { new_sequences.insert(*it2 + *it1); } } if (seq1.size() * seq2.size() > max_subscore_number_ /* && (offset_prefix > fragment_mass_tolerance_ || offset_suffix > fragment_mass_tolerance_)*/) { #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; cerr << tabs_ << "Reducing #sequences from " << new_sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; #endif if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(new_sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG for (set<String>::const_iterator it1 = new_sequences.begin(); it1 != new_sequences.end(); ++it1) { cerr << tabs_ << *it1 << endl; } cerr << endl; #endif } for (set<String>::const_iterator sit = new_sequences.begin(); sit != new_sequences.end(); ++sit) { sequences.insert(*sit); } } #ifdef DAC_DEBUG cerr << tabs_ << "Found sequences for " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << endl; for (set<String>::const_iterator sit = sequences.begin(); sit != sequences.end(); ++sit) { cerr << tabs_ << *sit << endl; } #endif // reduce the permuts once again to reduce complexity if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; #endif return; }
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }
void InspectInfile::handlePTMs(const String& modification_line, const String& modifications_filename, const bool monoisotopic) { PTMname_residues_mass_type_.clear(); // to store the information about modifications from the ptm xml file std::map<String, pair<String, String> > ptm_informations; if (!modification_line.empty()) // if modifications are used look whether whether composition and residues (and type and name) is given, the name (type) is used (then the modifications file is needed) or only the mass and residues (and type and name) is given { vector<String> modifications, mod_parts; modification_line.split(':', modifications); // get the single modifications if (modifications.empty()) modifications.push_back(modification_line); // to get masses from a formula EmpiricalFormula add_formula, substract_formula; String types = "OPT#FIX#"; String name, residues, mass, type; // 0 - mass; 1 - composition; 2 - ptm name Int mass_or_composition_or_name(-1); for (vector<String>::const_iterator mod_i = modifications.begin(); mod_i != modifications.end(); ++mod_i) { if (mod_i->empty()) { continue; } // clear the formulae add_formula = substract_formula = EmpiricalFormula(); name = residues = mass = type = ""; // get the single parts of the modification string mod_i->split(',', mod_parts); if (mod_parts.empty()) mod_parts.push_back(*mod_i); mass_or_composition_or_name = -1; // check whether the first part is a mass, composition or name // check whether it is a mass try { mass = mod_parts.front(); // to check whether the first part is a mass, it is converted into a float and then back into a string and compared to the given string // remove + signs because they don't appear in a float if (mass.hasPrefix("+")) mass.erase(0, 1); if (mass.hasSuffix("+")) mass.erase(mass.length() - 1, 1); if (mass.hasSuffix("-")) // a - sign at the end will not be converted { mass.erase(mass.length() - 1, 1); mass.insert(0, "-"); } // if it is a mass if (String(mass.toFloat()) == mass) mass_or_composition_or_name = 0; } catch (Exception::ConversionError& /*c_e*/) { mass_or_composition_or_name = -1; } // check whether it is a name (look it up in the corresponding file) if (mass_or_composition_or_name == -1) { if (ptm_informations.empty()) // if the ptm xml file has not been read yet, read it { if (!File::exists(modifications_filename)) { throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, modifications_filename); } if (!File::readable(modifications_filename)) { throw Exception::FileNotReadable(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, modifications_filename); } // getting all available modifications from a file PTMXMLFile().load(modifications_filename, ptm_informations); } // if the modification cannot be found if (ptm_informations.find(mod_parts.front()) != ptm_informations.end()) { mass = ptm_informations[mod_parts.front()].first; // composition residues = ptm_informations[mod_parts.front()].second; // residues name = mod_parts.front(); // name mass_or_composition_or_name = 2; } } // check whether it's an empirical formula / if a composition was given, get the mass if (mass_or_composition_or_name == -1) mass = mod_parts.front(); if (mass_or_composition_or_name == -1 || mass_or_composition_or_name == 2) { // check whether there is a positive and a negative formula String::size_type pos = mass.find("-"); try { if (pos != String::npos) { add_formula = EmpiricalFormula(mass.substr(0, pos)); substract_formula = EmpiricalFormula(mass.substr(++pos)); } else { add_formula = EmpiricalFormula(mass); } // sum up the masses if (monoisotopic) mass = String(add_formula.getMonoWeight() - substract_formula.getMonoWeight()); else mass = String(add_formula.getAverageWeight() - substract_formula.getAverageWeight()); if (mass_or_composition_or_name == -1) mass_or_composition_or_name = 1; } catch (Exception::ParseError& /*pe*/) { PTMname_residues_mass_type_.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's something wrong with this modification. Aborting!"); } } // now get the residues mod_parts.erase(mod_parts.begin()); if (mass_or_composition_or_name < 2) { if (mod_parts.empty()) { PTMname_residues_mass_type_.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "No residues for modification given. Aborting!"); } // get the residues residues = mod_parts.front(); residues.substitute('*', 'X'); residues.toUpper(); mod_parts.erase(mod_parts.begin()); } // get the type if (mod_parts.empty()) type = "OPT"; else { type = mod_parts.front(); type.toUpper(); if (types.find(type) != String::npos) mod_parts.erase(mod_parts.begin()); else type = "OPT"; } if (mod_parts.size() > 1) { PTMname_residues_mass_type_.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's something wrong with the type of this modification. Aborting!"); } // get the name if (mass_or_composition_or_name < 2) { if (mod_parts.empty()) name = "PTM_" + String(PTMname_residues_mass_type_.size()); else name = mod_parts.front(); } // insert the modification if (PTMname_residues_mass_type_.find(name) == PTMname_residues_mass_type_.end()) { PTMname_residues_mass_type_[name] = vector<String>(3); PTMname_residues_mass_type_[name][0] = residues; // mass must not have more than 5 digits after the . (otherwise the test may fail) PTMname_residues_mass_type_[name][1] = mass.substr(0, mass.find(".") + 6); PTMname_residues_mass_type_[name][2] = type; } else { PTMname_residues_mass_type_.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's already a modification with this name. Aborting!"); } } } }
void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight(); static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight(); Peak1D p; DoubleReal b_pos(0 + prefix); DoubleReal y_pos(h2o_mass + suffix); bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; for (Size z = 1; z <= charge && z < 3; ++z) { // b-ions if (b_pos >= min_mz_ && b_pos <= max_mz_) { for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/) { p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z)); spec.push_back(p); } } } // b-ion losses if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_) { if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D') { b_H2O_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K') { b_NH3_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } // a-ions only for charge 1 if (z == 1) { if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_) { // a-ions p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z); p.setIntensity(0.1f); spec.push_back(p); } } if (y_pos > min_mz_ && y_pos < max_mz_) { // y-ions for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z)); spec.push_back(p); } } // H2O loss p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (aa2 == 'Q') // pyroglutamic acid formation { p.setIntensity(0.5f); } if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } // NH3 loss if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K') { y_NH3_loss = true; p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } } } // if Q1 abundant loss of water -> pyroglutamic acid formation if (sequence[0] == 'Q' && prefix == 0 && suffix == 0) { /* for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it) { it->setIntensity(it->getIntensity() * 0.5); }*/ /* for (Size j = 0; j != max_isotope; ++j) { p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge); p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1); spec.push_back(p); } */ } spec.sortByPosition(); return; }
const Enzyme* EnzymesDB::parseEnzyme_(Map<String, String>& values) const { Enzyme* enzy_ptr = new Enzyme("unknown_enzyme", ""); for (Map<String, String>::iterator it = values.begin(); it != values.end(); ++it) { String key(it->first); String value(it->second); if (key.hasSuffix(":Name")) { enzy_ptr->setName(value); continue; } if (key.hasSuffix(":RegEx")) { enzy_ptr->setRegEx(value); continue; } if (key.hasSuffix(":RegExDescription")) { enzy_ptr->setRegExDescription(value); continue; } if (key.hasSuffix(":NTermGain")) { enzy_ptr->setNTermGain(EmpiricalFormula(value)); continue; } if (key.hasSuffix(":CTermGain")) { enzy_ptr->setCTermGain(EmpiricalFormula(value)); continue; } if (key.hasSubstring("PSIid")) { // no PSIid defined? if (!key.hasSuffix(":")) { enzy_ptr->setPSIid(value); } continue; } if (key.hasSubstring("XTANDEMid")) { if (!key.hasSuffix(":")) { enzy_ptr->setXTANDEMid(value); } continue; } if (key.hasSubstring("OMSSAid")) { if (!key.hasSuffix(":")) { enzy_ptr->setOMSSAid(value.toInt()); } continue; } if (key.hasSubstring("Synonyms")) { // no synonyms defined? if (!key.hasSuffix(":")) { enzy_ptr->addSynonym(value); } continue; } cerr << "unknown key: " << key << ", with value: " << value << endl; } return enzy_ptr; }
void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge) { bool add_metainfo(param_.getValue("add_metainfo").toBool()); DoubleReal pre_int((DoubleReal)param_.getValue("precursor_intensity")); DoubleReal pre_int_H2O((DoubleReal)param_.getValue("precursor_H2O_intensity")); DoubleReal pre_int_NH3((DoubleReal)param_.getValue("precursor_NH3_intensity")); bool add_isotopes(param_.getValue("add_isotopes").toBool()); int max_isotope((int)param_.getValue("max_isotope")); // precursor peak DoubleReal mono_pos = peptide.getMonoWeight(Residue::Full, charge) / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int * it->second); if (add_metainfo) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int); if (add_metainfo) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } // loss peaks of the precursor //loss of water EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O"); mono_pos = ion.getMonoWeight() / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int_H2O * it->second); if (add_metainfo) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int_H2O); if (add_metainfo) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } //loss of ammonia ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3"); mono_pos = ion.getMonoWeight() / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int_NH3 * it->second); if (add_metainfo) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int_NH3); if (add_metainfo) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } spec.sortByPosition(); }
void TheoreticalSpectrumGenerator::addPeaks(RichPeakSpectrum & spectrum, const AASequence & peptide, Residue::ResidueType res_type, Int charge) { if (peptide.empty()) { return; } Map<DoubleReal, AASequence> ions; Map<DoubleReal, String> names; AASequence ion; DoubleReal intensity(0); bool add_first_prefix_ion(param_.getValue("add_first_prefix_ion").toBool()); // generate the ion peaks switch (res_type) { case Residue::AIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::AIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "a" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("a_intensity"); break; } case Residue::BIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::BIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "b" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("b_intensity"); break; } case Residue::CIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::CIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "c" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("c_intensity"); break; } case Residue::XIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::XIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "x" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("x_intensity"); break; } case Residue::YIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::YIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "y" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("y_intensity"); break; } case Residue::ZIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::ZIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "z" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("z_intensity"); break; } default: cerr << "Cannot create peaks of that ion type" << endl; } // get the params bool add_losses(param_.getValue("add_losses").toBool()); bool add_metainfo(param_.getValue("add_metainfo").toBool()); bool add_isotopes(param_.getValue("add_isotopes").toBool()); Int max_isotope((Int)param_.getValue("max_isotope")); DoubleReal rel_loss_intensity((DoubleReal)param_.getValue("relative_loss_intensity")); for (Map<DoubleReal, AASequence>::ConstIterator cit = ions.begin(); cit != ions.end(); ++cit) { ion = cit->second; DoubleReal pos = cit->first; String ion_name = names[pos]; if (add_isotopes) { IsotopeDistribution dist = ion.getFormula(res_type, charge).getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(pos + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(intensity * it->second); if (add_metainfo && j == 0) { p_.setMetaValue("IonName", ion_name); } spectrum.push_back(p_); } } else { p_.setMZ(pos); p_.setIntensity(intensity); if (add_metainfo) { p_.setMetaValue("IonName", ion_name); } spectrum.push_back(p_); } if (add_losses) { set<String> losses; for (AASequence::ConstIterator it = cit->second.begin(); it != cit->second.end(); ++it) { if (it->hasNeutralLoss()) { vector<EmpiricalFormula> loss_formulas = it->getLossFormulas(); for (Size i = 0; i != loss_formulas.size(); ++i) { losses.insert(loss_formulas[i].toString()); } } } if (!add_isotopes) { p_.setIntensity(intensity * rel_loss_intensity); } for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it) { EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it); // thanks to Chris and Sandro // check for negative element frequencies (might happen if losses are not allowed for specific ions) bool negative_elements(false); for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit) { if (eit->second < 0) { negative_elements = true; break; } } if (negative_elements) { continue; } DoubleReal loss_pos = loss_ion.getMonoWeight() / (DoubleReal)charge; String loss_name = *it; if (add_isotopes) { IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso) { p_.setMZ((DoubleReal)(loss_pos + j) / (DoubleReal)charge); p_.setIntensity(intensity * rel_loss_intensity * iso->second); if (add_metainfo && j == 0) { p_.setMetaValue("IonName", ion_name + "-" + loss_name); } spectrum.push_back(p_); } } else { p_.setMZ(loss_pos); if (add_metainfo) { p_.setMetaValue("IonName", ion_name + "-" + loss_name); } spectrum.push_back(p_); } } } } if (add_metainfo) { p_.setMetaValue("IonName", String("")); } spectrum.sortByPosition(); return; }