double WeightWrapper::getWeight(const AASequence & aa) const { if (weight_mode_ == WeightWrapper::MONO) return aa.getMonoWeight(); else return aa.getAverageWeight(); }
bool EnzymaticDigestionLogModel::isCleavageSite_( const AASequence& protein, const AASequence::ConstIterator& iterator) const { if (enzyme_.getName() != "Trypsin") // no cleavage { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("EnzymaticDigestionLogModel: enzyme '") + enzyme_.getName() + " does not support logModel!"); } else { if ((!enzyme_.getRegEx().hasSubstring(iterator->getOneLetterCode())) || *iterator == 'P') // wait for R or K { return false; } SignedSize pos = distance(AASequence::ConstIterator(protein.begin()), iterator) - 4; // start position in sequence double score_cleave = 0, score_missed = 0; for (SignedSize i = 0; i < 9; ++i) { if ((pos + i >= 0) && (pos + i < (SignedSize)protein.size())) { BindingSite_ bs(i, protein[pos + i].getOneLetterCode()); Map<BindingSite_, CleavageModel_>::const_iterator pos_it = model_data_.find(bs); if (pos_it != model_data_.end()) // no data for non-std. amino acids { score_cleave += pos_it->second.p_cleave; score_missed += pos_it->second.p_miss; } } } return score_missed - score_cleave > log_model_threshold_; } }
int main() { // construct a AASequence object, query a residue // and output some of its properties AASequence aas = AASequence::fromString("DECIANGER"); cout << aas[2].getName() << " " << aas[2].getFormula().toString() << " " << aas[2].getModificationName() << " " << aas[2].getMonoWeight() << endl; // find a modification in ModificationsDB // and output some of its properties // getInstance() returns a pointer to a ModsDB instance ResidueModification mod = ModificationsDB::getInstance()->getModification("Carbamidomethyl (C)"); cout << mod.getOrigin() << " " << mod.getFullId() << " " << mod.getDiffMonoMass() << " " << mod.getMonoMass() << endl; // set the modification on a residue of a peptide // and output some of its properties (the formula and mass have changed) // in this case ModificationsDB is used in the background // to relate the name of the mod to its attributes aas.setModification(2, "Carbamidomethyl (C)"); cout << aas[2].getName() << " " << aas[2].getFormula().toString() << " " << aas[2].getModificationName() << " " << aas[2].getMonoWeight() << endl; return 0; } //end of main
Feature ICPLLabeler::mergeFeatures_(Feature& feature_to_merge, const AASequence& labeled_feature_sequence, Map<String, Feature>& feature_index) const { // merge with feature from first map (if it exists) if (feature_index.count(labeled_feature_sequence.toString()) != 0) { // we only merge abundance and use feature from first map Feature new_f = feature_index[labeled_feature_sequence.toString()]; new_f.setMetaValue(getChannelIntensityName(1), new_f.getIntensity()); new_f.setMetaValue(getChannelIntensityName(2), feature_to_merge.getIntensity()); new_f.setIntensity(new_f.getIntensity() + feature_to_merge.getIntensity()); mergeProteinAccessions_(new_f, feature_to_merge); // remove feature from index feature_index.erase(labeled_feature_sequence.toString()); return new_f; } else { // simply add feature from second channel, since we have no corresponding feature in the first channel return feature_to_merge; } }
String ICPLLabeler::getUnmodifiedAASequence_(const Feature& feature, const String& label) const { AASequence unmodified = feature.getPeptideIdentifications()[0].getHits()[0].getSequence(); if (unmodified.getNTerminalModification() == label) { unmodified.setNTerminalModification(""); // remove terminal modification, if it is the channel specific one } return unmodified.toString(); }
Size EnzymaticDigestionLogModel::peptideCount(const AASequence& protein) { SignedSize count = 1; AASequence::ConstIterator iterator = protein.begin(); while (nextCleavageSite_(protein, iterator), iterator != protein.end()) { ++count; } Size sum = count; return sum; }
String CompNovoIdentificationBase::getModifiedStringFromAASequence_(const AASequence & sequence) { String seq; for (AASequence::ConstIterator it = sequence.begin(); it != sequence.end(); ++it) { if (residue_to_name_.has(&*it)) { seq += residue_to_name_[&*it]; } else { seq += it->getOneLetterCode(); } } return seq; }
String IBSpectraFile::getModifString_(const AASequence& sequence) { String modif = sequence.getNTerminalModification(); for (AASequence::ConstIterator aa_it = sequence.begin(); aa_it != sequence.end(); ++aa_it) { modif += ":" + aa_it->getModification(); } if (sequence.getCTerminalModification() != "") { modif += ":" + sequence.getCTerminalModification(); } return modif; }
void EnzymaticDigestion::digest(const AASequence& protein, vector<AASequence>& output) const { // initialization output.clear(); // naive cleavage sites Size missed_cleavages = missed_cleavages_; std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString()); Size count = pep_positions.size(); Size begin = pep_positions[0]; for (Size i = 1; i < count; ++i) { output.push_back(protein.getSubsequence(begin, pep_positions[i] - begin)); begin = pep_positions[i]; } output.push_back(protein.getSubsequence(begin, protein.size() - begin)); // missed cleavages if (pep_positions.size() > 0 && missed_cleavages_ != 0) // there is at least one cleavage site! { // generate fragments with missed cleavages for (Size i = 1; ((i <= missed_cleavages) && (count > i)); ++i) { begin = pep_positions[0]; for (Size j = 1; j < count - i; ++j) { output.push_back(protein.getSubsequence(begin, pep_positions[j + i] - begin)); begin = pep_positions[j]; } output.push_back(protein.getSubsequence(begin, protein.size() - begin)); } } }
void ICPLLabeler::addLabelToProteinHits_(SimTypes::FeatureMapSim& features, const String& label) const { // check if proteinIdentification exists before accessing it if (features.getProteinIdentifications().empty()) return; for (std::vector<ProteinHit>::iterator protein_hit = features.getProteinIdentifications()[0].getHits().begin(); protein_hit != features.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { AASequence aa = AASequence::fromString(protein_hit->getSequence()); // modify only if the term is accessible if (aa.getNTerminalModification() == "") { aa.setNTerminalModification(label); protein_hit->setSequence(aa.toString()); } } }
Size ProteaseDigestion::peptideCount(const AASequence& protein) { // For unspecific cleavage every cutting position may be skipped. Thus, we get (n + 1) \choose 2 products. if (enzyme_->getName() == UnspecificCleavage) { return (protein.size() + 1) * protein.size() / 2; }; std::vector<int> pep_positions = tokenize_(protein.toUnmodifiedString()); Size count = pep_positions.size(); // missed cleavages Size sum = count; for (Size i = 1; i < count; ++i) { if (i > missed_cleavages_) break; sum += count - i; } return sum; }
void TheoreticalSpectrumGenerator::addIsotopeCluster_(RichPeakSpectrum & spectrum, const AASequence & ion, Residue::ResidueType res_type, Int charge, double intensity) const { double pos = ion.getMonoWeight(res_type, charge) / (double)charge; RichPeak1D p; IsotopeDistribution dist = ion.getFormula(res_type, charge).getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { // TODO: this is usually dominated by 13C-12C mass shift which deviates a bit from neutron mass p.setMZ((double)(pos + (double)j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(intensity * it->second); if (add_metainfo_ && j == 0) { String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } }
bool ProteaseDigestion::isValidProduct(const AASequence& protein, int pep_pos, int pep_length, bool ignore_missed_cleavages, bool allow_nterm_protein_cleavage, bool allow_random_asp_pro_cleavage) const { String seq = protein.toUnmodifiedString(); return isValidProduct_(seq, pep_pos, pep_length, ignore_missed_cleavages, allow_nterm_protein_cleavage, allow_random_asp_pro_cleavage); }
vector<Size> AScore::getSites_(const AASequence& without_phospho) const { vector<Size> tupel; String unmodified = without_phospho.toUnmodifiedString(); for (Size i = 0; i < unmodified.size(); ++i) { if (unmodified[i] == 'Y' || unmodified[i] == 'T' || unmodified[i] == 'S') { tupel.push_back(i); } } return tupel; }
void EnzymaticDigestionLogModel::nextCleavageSite_(const AASequence& protein, AASequence::ConstIterator& iterator) const { while (iterator != protein.end()) { if (isCleavageSite_(protein, iterator)) { ++iterator; return; } ++iterator; } return; }
Size EnzymaticDigestion::peptideCount(const AASequence& protein) { std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString()); Size count = pep_positions.size(); // missed cleavages Size sum = count; for (Size i = 1; i < count; ++i) { if (i > missed_cleavages_) break; sum += count - i; } return sum; }
void EnzymaticDigestionLogModel::digest(const AASequence& protein, vector<AASequence>& output) const { // initialization output.clear(); AASequence::ConstIterator begin = protein.begin(); AASequence::ConstIterator end = protein.begin(); while (nextCleavageSite_(protein, end), end != protein.end()) { output.push_back(protein.getSubsequence(begin - protein.begin(), end - begin)); begin = end; } output.push_back(protein.getSubsequence(begin - protein.begin(), end - begin)); }
void ConsensusIDAlgorithm::compareChargeStates_(Int& recorded_charge, Int new_charge, const AASequence& peptide) { if (recorded_charge == 0) // update recorded charge { recorded_charge = new_charge; } else if ((new_charge != 0) && (recorded_charge != new_charge)) { // maybe TODO: calculate correct charge from prec. m/z and peptide mass? String msg = "Conflicting charge states found for peptide '" + peptide.toString() + "': " + String(recorded_charge) + ", " + String(new_charge); throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, msg, String(new_charge)); } }
void SILACLabeler::applyLabelToProteinHit_(SimTypes::FeatureMapSim& channel, const String& arginine_label, const String& lysine_label) const { for (std::vector<ProteinHit>::iterator protein_hit = channel.getProteinIdentifications()[0].getHits().begin(); protein_hit != channel.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { AASequence aa = AASequence::fromString(protein_hit->getSequence()); for (AASequence::Iterator residue = aa.begin(); residue != aa.end(); ++residue) { if (*residue == 'R') { aa.setModification(residue - aa.begin(), arginine_label); } else if (*residue == 'K') { aa.setModification(residue - aa.begin(), lysine_label); } } protein_hit->setSequence(aa.toString()); } }
Size ProteaseDigestion::digest(const AASequence& protein, vector<AASequence>& output, Size min_length, Size max_length) const { // initialization output.clear(); // disable max length filter by setting to maximum length if (max_length == 0 || max_length > protein.size()) { max_length = protein.size(); } Size mc = (enzyme_->getName() == UnspecificCleavage) ? std::numeric_limits<Size>::max() : missed_cleavages_; Size wrong_size(0); // naive cleavage sites std::vector<int> pep_positions = tokenize_(protein.toUnmodifiedString()); pep_positions.push_back(protein.size()); // positions now contains 0, x1, ... xn, end Size count = pep_positions.size(); Size begin = pep_positions[0]; for (Size i = 1; i < count; ++i) { Size l = pep_positions[i] - begin; if (l >= min_length && l <= max_length) output.push_back(protein.getSubsequence(begin, l)); else ++wrong_size; begin = pep_positions[i]; } // missed cleavages if (pep_positions.size() > 1 && mc != 0) // there is at least one cleavage site (in addition to last position)! { // generate fragments with missed cleavages for (Size mcs = 1; ((mcs <= mc) && (mcs < count - 1)); ++mcs) { begin = pep_positions[0]; for (Size j = 1; j < count - mcs; ++j) { Size l = pep_positions[j + mcs] - begin; if (l >= min_length && l <= max_length) output.push_back(protein.getSubsequence(begin, l)); else ++wrong_size; begin = pep_positions[j]; } } } return wrong_size; }
TEST_EQUAL(out[5].toString(),"PLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFLGSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRK") TEST_EQUAL(out[6].toString(),"VPQVSTPTLVEVSRSLGK") TEST_EQUAL(out[7].toString(),"VGTRCCTK") TEST_EQUAL(out[8].toString(),"PESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCCTESLVNRR") TEST_EQUAL(out[9].toString(),"PCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQTALVELLKHK") TEST_EQUAL(out[10].toString(),"PKATEEQLKTVMENFVAFDKCCAADDKEACFAVEGPKLVVSTQTALA") END_SECTION START_SECTION(( bool isValidProduct(const AASequence& protein, Size pep_pos, Size pep_length) )) EnzymaticDigestion ed; ed.setEnzyme(EnzymaticDigestion::ENZYME_TRYPSIN); ed.setSpecificity(EnzymaticDigestion::SPEC_FULL); // require both sides AASequence prot = AASequence("ABCDEFGKABCRAAAKAARPBBBB"); TEST_EQUAL(ed.isValidProduct(prot, 100, 3), false); // invalid position TEST_EQUAL(ed.isValidProduct(prot, 10, 300), false); // invalid length TEST_EQUAL(ed.isValidProduct(prot, 10, 0), false); // invalid size TEST_EQUAL(ed.isValidProduct(AASequence(""), 10, 0), false); // invalid size TEST_EQUAL(ed.isValidProduct(prot, 0, 3), false); // invalid N-term TEST_EQUAL(ed.isValidProduct(prot, 0, 8), true); // valid N-term TEST_EQUAL(ed.isValidProduct(prot, 8, 4), true); // valid fully-tryptic TEST_EQUAL(ed.isValidProduct(prot, 8, 8), true); // valid fully-tryptic TEST_EQUAL(ed.isValidProduct(prot, 0, 19), false); // invalid C-term - followed by proline TEST_EQUAL(ed.isValidProduct(prot, 8, 3), false); // invalid C-term TEST_EQUAL(ed.isValidProduct(prot, 3, 6), false); // invalid C+N-term TEST_EQUAL(ed.isValidProduct(prot, 1, 7), false); // invalid N-term TEST_EQUAL(ed.isValidProduct(prot, 0, prot.size()), true); // the whole thing
bool EnzymaticDigestion::isValidProduct(const AASequence& protein, Size pep_pos, Size pep_length, bool methionine_cleavage, bool ignore_missed_cleavages) const { if (pep_pos >= protein.size()) { LOG_WARN << "Error: start of peptide (" << pep_pos << ") is beyond end of protein '" << protein.toString() << "'!" << endl; return false; } else if (pep_pos + pep_length > protein.size()) { LOG_WARN << "Error: end of peptide (" << (pep_pos + pep_length) << ") is beyond end of protein '" << protein.toString() << "'!" << endl; return false; } else if (pep_length == 0 || protein.size() == 0) { LOG_WARN << "Error: peptide or protein must not be empty!" << endl; return false; } if (specificity_ == SPEC_NONE) { return true; // we don't care about terminal ends } else // either SPEC_SEMI or SPEC_FULL { bool spec_c = false, spec_n = false; std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString()); // initialize start and end std::vector<Size>::const_iterator begin_pos, end_pos; begin_pos = end_pos = pep_positions.end(); // test each end if (pep_pos == 0 || (begin_pos = std::find(pep_positions.begin(), pep_positions.end(), pep_pos)) != pep_positions.end()) { spec_n = true; } // if allow methionine cleavage at the protein start position if (pep_pos == 1 && methionine_cleavage && protein.getResidue((Size)0).getOneLetterCode() == "M") { // methionine_cleavage:consider the first product for begin_pos begin_pos = pep_positions.begin(); spec_n = true; } if (pep_pos + pep_length == protein.size() || (end_pos = std::find(pep_positions.begin(), pep_positions.end(), pep_pos + pep_length)) != pep_positions.end()) { spec_c = true; } if (spec_n && spec_c) { if (ignore_missed_cleavages) { return true; } Size offset = std::distance(begin_pos, end_pos); if (pep_pos + pep_length == protein.size()) { return (pep_positions.size() <= getMissedCleavages() + 1); } else if (offset > getMissedCleavages() + 1) { return false; } else if (offset == 0) { // This corner case needs to be checked when peptide is at the start and the end of the protein. // We check with the total number of cleavages. return (pep_positions.size() >= getMissedCleavages() + 1); } else { return true; } } else if ((specificity_ == SPEC_SEMI) && (spec_n || spec_c)) { return true; // one only for SEMI } else { return false; } } }
void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge) const { RichPeak1D p; // precursor peak double mono_pos = peptide.getMonoWeight(Residue::Full, charge) / double(charge); if (add_isotopes_) { IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_ * it->second); if (add_metainfo_) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_); if (add_metainfo_) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } // loss peaks of the precursor //loss of water EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O"); mono_pos = ion.getMonoWeight() / double(charge); if (add_isotopes_) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_H2O_ * it->second); if (add_metainfo_) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_H2O_); if (add_metainfo_) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } //loss of ammonia ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3"); mono_pos = ion.getMonoWeight() / double(charge); if (add_isotopes_) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge); p.setIntensity(pre_int_NH3_ * it->second); if (add_metainfo_) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } } else { p.setMZ(mono_pos); p.setIntensity(pre_int_NH3_); if (add_metainfo_) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p.setMetaValue("IonName", name); } spec.push_back(p); } spec.sortByPosition(); }
void TheoreticalSpectrumGenerator::addPeaks(RichPeakSpectrum & spectrum, const AASequence & peptide, Residue::ResidueType res_type, Int charge) const { if (peptide.empty()) { return; } spectrum.reserve(peptide.size()); // Generate the ion peaks: // Does not generate peaks of full peptide (therefore "<"). // They are added via precursor mass (and neutral losses). // Could be changed in the future. switch (res_type) { case Residue::AIon: { Size i = 1; if (!add_first_prefix_ion_) { i = 2; } for (; i < peptide.size(); ++i) { AASequence ion = peptide.getPrefix(i); if (add_isotopes_) // add isotope cluster { addIsotopeCluster_(spectrum, ion, res_type, charge, a_intensity_); } else // add single peak { double pos = ion.getMonoWeight(Residue::AIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, a_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, a_intensity_, res_type, charge); } } break; } case Residue::BIon: { Size i = 1; if (!add_first_prefix_ion_) { i = 2; } for (; i < peptide.size(); ++i) { AASequence ion = peptide.getPrefix(i); if (add_isotopes_) { addIsotopeCluster_(spectrum, ion, res_type, charge, b_intensity_); } else { double pos = ion.getMonoWeight(Residue::BIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, b_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, b_intensity_, res_type, charge); } } break; } case Residue::CIon: { Size i = 1; if (!add_first_prefix_ion_) { i = 2; } if (peptide.size() < 2) { //"Cannot create c ions of a monomer." throw Exception::InvalidSize(__FILE__, __LINE__, __PRETTY_FUNCTION__, 1); } for (; i < peptide.size(); ++i) { AASequence ion = peptide.getPrefix(i); if (add_isotopes_) { addIsotopeCluster_(spectrum, ion, res_type, charge, c_intensity_); } else { double pos = ion.getMonoWeight(Residue::CIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, c_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, c_intensity_, res_type, charge); } } break; } case Residue::XIon: { Size i = 1; if (!add_first_prefix_ion_) { i = 2; } if (peptide.size() < 2) { // "Cannot create c ions of a monomer." throw Exception::InvalidSize(__FILE__, __LINE__, __PRETTY_FUNCTION__, 1); } for (; i < peptide.size(); ++i) { AASequence ion = peptide.getSuffix(i); if (add_isotopes_) { addIsotopeCluster_(spectrum, ion, res_type, charge, x_intensity_); } else { double pos = ion.getMonoWeight(Residue::XIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, x_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, x_intensity_, res_type, charge); } } break; } case Residue::YIon: { for (Size i = 1; i < peptide.size(); ++i) { AASequence ion = peptide.getSuffix(i); if (add_isotopes_) { addIsotopeCluster_(spectrum, ion, res_type, charge, y_intensity_); } else { double pos = ion.getMonoWeight(Residue::YIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, y_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, y_intensity_, res_type, charge); } } break; } case Residue::ZIon: { for (Size i = 1; i < peptide.size(); ++i) { AASequence ion = peptide.getSuffix(i); if (add_isotopes_) { addIsotopeCluster_(spectrum, ion, res_type, charge, z_intensity_); } else { double pos = ion.getMonoWeight(Residue::ZIon, charge) / static_cast<double>(charge); addPeak_(spectrum, pos, z_intensity_, res_type, i, charge); } if (add_losses_) { addLosses_(spectrum, ion, z_intensity_, res_type, charge); } } break; } default: cerr << "Cannot create peaks of that ion type" << endl; } spectrum.sortByPosition(); return; }
void TheoreticalSpectrumGenerator::addLosses_(RichPeakSpectrum & spectrum, const AASequence & ion, double intensity, Residue::ResidueType res_type, int charge) const { RichPeak1D p; set<String> losses; for (AASequence::ConstIterator it = ion.begin(); it != ion.end(); ++it) { if (it->hasNeutralLoss()) { vector<EmpiricalFormula> loss_formulas = it->getLossFormulas(); for (Size i = 0; i != loss_formulas.size(); ++i) { losses.insert(loss_formulas[i].toString()); } } } if (!add_isotopes_) { p.setIntensity(intensity * rel_loss_intensity_); } for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it) { EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it); // thanks to Chris and Sandro // check for negative element frequencies (might happen if losses are not allowed for specific ions) bool negative_elements(false); for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit) { if (eit->second < 0) { negative_elements = true; break; } } if (negative_elements) { continue; } double loss_pos = loss_ion.getMonoWeight() / (double)charge; const String& loss_name = *it; if (add_isotopes_) { IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso) { p.setMZ((double)(loss_pos + j) / (double)charge); p.setIntensity(intensity * rel_loss_intensity_ * iso->second); if (add_metainfo_ && j == 0) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } else { p.setMZ(loss_pos); if (add_metainfo_) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } }
void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge) { bool add_metainfo(param_.getValue("add_metainfo").toBool()); DoubleReal pre_int((DoubleReal)param_.getValue("precursor_intensity")); DoubleReal pre_int_H2O((DoubleReal)param_.getValue("precursor_H2O_intensity")); DoubleReal pre_int_NH3((DoubleReal)param_.getValue("precursor_NH3_intensity")); bool add_isotopes(param_.getValue("add_isotopes").toBool()); int max_isotope((int)param_.getValue("max_isotope")); // precursor peak DoubleReal mono_pos = peptide.getMonoWeight(Residue::Full, charge) / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int * it->second); if (add_metainfo) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int); if (add_metainfo) { String name("[M+H]+"); if (charge == 2) { name = "[M+2H]++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } // loss peaks of the precursor //loss of water EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O"); mono_pos = ion.getMonoWeight() / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int_H2O * it->second); if (add_metainfo) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int_H2O); if (add_metainfo) { String name("[M+H]-H2O+"); if (charge == 2) { name = "[M+2H]-H2O++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } //loss of ammonia ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3"); mono_pos = ion.getMonoWeight() / DoubleReal(charge); if (add_isotopes) { IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(pre_int_NH3 * it->second); if (add_metainfo) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } } else { p_.setMZ(mono_pos); p_.setIntensity(pre_int_NH3); if (add_metainfo) { String name("[M+H]-NH3+"); if (charge == 2) { name = "[M+2H]-NH3++"; } p_.setMetaValue("IonName", name); } spec.push_back(p_); } spec.sortByPosition(); }
ww.setWeightMode(WeightWrapper::AVERAGE); TEST_EQUAL(ww.getWeightMode(), WeightWrapper::AVERAGE) } END_SECTION START_SECTION((WEIGHTMODE getWeightMode() const )) { WeightWrapper ww; TEST_EQUAL(ww.getWeightMode(), WeightWrapper::MONO) } END_SECTION START_SECTION((double getWeight(const AASequence &aa) const )) { WeightWrapper ww; AASequence aa= AASequence::fromString("DFINAGER"); TEST_EQUAL(ww.getWeight(aa), aa.getMonoWeight()) WeightWrapper ww2(WeightWrapper::AVERAGE); TEST_EQUAL(ww2.getWeight(aa), aa.getAverageWeight()) } END_SECTION START_SECTION((double getWeight(const EmpiricalFormula &ef) const )) { WeightWrapper ww; EmpiricalFormula aa("C12H544"); TEST_EQUAL(ww.getWeight(aa), aa.getMonoWeight()) WeightWrapper ww2(WeightWrapper::AVERAGE); TEST_EQUAL(ww2.getWeight(aa), aa.getAverageWeight()) } END_SECTION
void TheoreticalSpectrumGenerator::addPeaks(RichPeakSpectrum & spectrum, const AASequence & peptide, Residue::ResidueType res_type, Int charge) { if (peptide.empty()) { return; } Map<DoubleReal, AASequence> ions; Map<DoubleReal, String> names; AASequence ion; DoubleReal intensity(0); bool add_first_prefix_ion(param_.getValue("add_first_prefix_ion").toBool()); // generate the ion peaks switch (res_type) { case Residue::AIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::AIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "a" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("a_intensity"); break; } case Residue::BIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::BIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "b" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("b_intensity"); break; } case Residue::CIon: { Size i = 1; if (!add_first_prefix_ion) { i = 2; } for (; i < peptide.size(); ++i) { ion = peptide.getPrefix(i); DoubleReal pos = ion.getMonoWeight(Residue::CIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "c" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("c_intensity"); break; } case Residue::XIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::XIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "x" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("x_intensity"); break; } case Residue::YIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::YIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "y" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("y_intensity"); break; } case Residue::ZIon: { for (Size i = 1; i < peptide.size(); ++i) { ion = peptide.getSuffix(i); DoubleReal pos = ion.getMonoWeight(Residue::ZIon, charge) / (DoubleReal)charge; ions[pos] = ion; names[pos] = "z" + String(i) + String(charge, '+'); } intensity = (DoubleReal)param_.getValue("z_intensity"); break; } default: cerr << "Cannot create peaks of that ion type" << endl; } // get the params bool add_losses(param_.getValue("add_losses").toBool()); bool add_metainfo(param_.getValue("add_metainfo").toBool()); bool add_isotopes(param_.getValue("add_isotopes").toBool()); Int max_isotope((Int)param_.getValue("max_isotope")); DoubleReal rel_loss_intensity((DoubleReal)param_.getValue("relative_loss_intensity")); for (Map<DoubleReal, AASequence>::ConstIterator cit = ions.begin(); cit != ions.end(); ++cit) { ion = cit->second; DoubleReal pos = cit->first; String ion_name = names[pos]; if (add_isotopes) { IsotopeDistribution dist = ion.getFormula(res_type, charge).getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j) { p_.setMZ((DoubleReal)(pos + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge); p_.setIntensity(intensity * it->second); if (add_metainfo && j == 0) { p_.setMetaValue("IonName", ion_name); } spectrum.push_back(p_); } } else { p_.setMZ(pos); p_.setIntensity(intensity); if (add_metainfo) { p_.setMetaValue("IonName", ion_name); } spectrum.push_back(p_); } if (add_losses) { set<String> losses; for (AASequence::ConstIterator it = cit->second.begin(); it != cit->second.end(); ++it) { if (it->hasNeutralLoss()) { vector<EmpiricalFormula> loss_formulas = it->getLossFormulas(); for (Size i = 0; i != loss_formulas.size(); ++i) { losses.insert(loss_formulas[i].toString()); } } } if (!add_isotopes) { p_.setIntensity(intensity * rel_loss_intensity); } for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it) { EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it); // thanks to Chris and Sandro // check for negative element frequencies (might happen if losses are not allowed for specific ions) bool negative_elements(false); for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit) { if (eit->second < 0) { negative_elements = true; break; } } if (negative_elements) { continue; } DoubleReal loss_pos = loss_ion.getMonoWeight() / (DoubleReal)charge; String loss_name = *it; if (add_isotopes) { IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope); UInt j(0); for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso) { p_.setMZ((DoubleReal)(loss_pos + j) / (DoubleReal)charge); p_.setIntensity(intensity * rel_loss_intensity * iso->second); if (add_metainfo && j == 0) { p_.setMetaValue("IonName", ion_name + "-" + loss_name); } spectrum.push_back(p_); } } else { p_.setMZ(loss_pos); if (add_metainfo) { p_.setMetaValue("IonName", ion_name + "-" + loss_name); } spectrum.push_back(p_); } } } } if (add_metainfo) { p_.setMetaValue("IonName", String("")); } spectrum.sortByPosition(); return; }
bool ModificationDefinitionsSet::isCompatible(const AASequence & peptide) const { set<String> var_names(getVariableModificationNames()), fixed_names(getFixedModificationNames()); // no modifications present and needed if (fixed_names.empty() && !peptide.isModified()) { return true; } // check whether the fixed modifications are fulfilled if (!fixed_names.empty()) { for (set<String>::const_iterator it1 = fixed_names.begin(); it1 != fixed_names.end(); ++it1) { String origin = ModificationsDB::getInstance()->getModification(*it1).getOrigin(); // only single 1lc amino acids are allowed if (origin.size() != 1) { continue; } for (AASequence::ConstIterator it2 = peptide.begin(); it2 != peptide.end(); ++it2) { if (origin == it2->getOneLetterCode()) { // check whether the residue is modified (has to be) if (!it2->isModified()) { return false; } // check whether the modification is the same if (ModificationsDB::getInstance()->getModification(*it1).getId() != it2->getModification()) { return false; } } } } } // check wether other modifications than the variable are present for (AASequence::ConstIterator it = peptide.begin(); it != peptide.end(); ++it) { if (it->isModified()) { String mod = ModificationsDB::getInstance()->getModification(it->getOneLetterCode(), it->getModification(), ResidueModification::ANYWHERE).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } } if (peptide.hasNTerminalModification()) { String mod = ModificationsDB::getInstance()->getTerminalModification(peptide.getNTerminalModification(), ResidueModification::N_TERM).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } if (peptide.hasCTerminalModification()) { String mod = ModificationsDB::getInstance()->getTerminalModification(peptide.getCTerminalModification(), ResidueModification::C_TERM).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } return true; }