void EnzymaticDigestionLogModel::digest(const AASequence& protein, vector<AASequence>& output) const { // initialization output.clear(); AASequence::ConstIterator begin = protein.begin(); AASequence::ConstIterator end = protein.begin(); while (nextCleavageSite_(protein, end), end != protein.end()) { output.push_back(protein.getSubsequence(begin - protein.begin(), end - begin)); begin = end; } output.push_back(protein.getSubsequence(begin - protein.begin(), end - begin)); }
bool EnzymaticDigestionLogModel::isCleavageSite_( const AASequence& protein, const AASequence::ConstIterator& iterator) const { if (enzyme_.getName() != "Trypsin") // no cleavage { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("EnzymaticDigestionLogModel: enzyme '") + enzyme_.getName() + " does not support logModel!"); } else { if ((!enzyme_.getRegEx().hasSubstring(iterator->getOneLetterCode())) || *iterator == 'P') // wait for R or K { return false; } SignedSize pos = distance(AASequence::ConstIterator(protein.begin()), iterator) - 4; // start position in sequence double score_cleave = 0, score_missed = 0; for (SignedSize i = 0; i < 9; ++i) { if ((pos + i >= 0) && (pos + i < (SignedSize)protein.size())) { BindingSite_ bs(i, protein[pos + i].getOneLetterCode()); Map<BindingSite_, CleavageModel_>::const_iterator pos_it = model_data_.find(bs); if (pos_it != model_data_.end()) // no data for non-std. amino acids { score_cleave += pos_it->second.p_cleave; score_missed += pos_it->second.p_miss; } } } return score_missed - score_cleave > log_model_threshold_; } }
Size EnzymaticDigestionLogModel::peptideCount(const AASequence& protein) { SignedSize count = 1; AASequence::ConstIterator iterator = protein.begin(); while (nextCleavageSite_(protein, iterator), iterator != protein.end()) { ++count; } Size sum = count; return sum; }
void SILACLabeler::applyLabelToProteinHit_(SimTypes::FeatureMapSim& channel, const String& arginine_label, const String& lysine_label) const { for (std::vector<ProteinHit>::iterator protein_hit = channel.getProteinIdentifications()[0].getHits().begin(); protein_hit != channel.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { AASequence aa = AASequence::fromString(protein_hit->getSequence()); for (AASequence::Iterator residue = aa.begin(); residue != aa.end(); ++residue) { if (*residue == 'R') { aa.setModification(residue - aa.begin(), arginine_label); } else if (*residue == 'K') { aa.setModification(residue - aa.begin(), lysine_label); } } protein_hit->setSequence(aa.toString()); } }
String CompNovoIdentificationBase::getModifiedStringFromAASequence_(const AASequence & sequence) { String seq; for (AASequence::ConstIterator it = sequence.begin(); it != sequence.end(); ++it) { if (residue_to_name_.has(&*it)) { seq += residue_to_name_[&*it]; } else { seq += it->getOneLetterCode(); } } return seq; }
String IBSpectraFile::getModifString_(const AASequence& sequence) { String modif = sequence.getNTerminalModification(); for (AASequence::ConstIterator aa_it = sequence.begin(); aa_it != sequence.end(); ++aa_it) { modif += ":" + aa_it->getModification(); } if (sequence.getCTerminalModification() != "") { modif += ":" + sequence.getCTerminalModification(); } return modif; }
void TheoreticalSpectrumGenerator::addLosses_(RichPeakSpectrum & spectrum, const AASequence & ion, double intensity, Residue::ResidueType res_type, int charge) const { RichPeak1D p; set<String> losses; for (AASequence::ConstIterator it = ion.begin(); it != ion.end(); ++it) { if (it->hasNeutralLoss()) { vector<EmpiricalFormula> loss_formulas = it->getLossFormulas(); for (Size i = 0; i != loss_formulas.size(); ++i) { losses.insert(loss_formulas[i].toString()); } } } if (!add_isotopes_) { p.setIntensity(intensity * rel_loss_intensity_); } for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it) { EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it); // thanks to Chris and Sandro // check for negative element frequencies (might happen if losses are not allowed for specific ions) bool negative_elements(false); for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit) { if (eit->second < 0) { negative_elements = true; break; } } if (negative_elements) { continue; } double loss_pos = loss_ion.getMonoWeight() / (double)charge; const String& loss_name = *it; if (add_isotopes_) { IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope_); UInt j(0); for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso) { p.setMZ((double)(loss_pos + j) / (double)charge); p.setIntensity(intensity * rel_loss_intensity_ * iso->second); if (add_metainfo_ && j == 0) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } else { p.setMZ(loss_pos); if (add_metainfo_) { // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+'); p.setMetaValue("IonName", ion_name); } spectrum.push_back(p); } } }
bool ModificationDefinitionsSet::isCompatible(const AASequence & peptide) const { set<String> var_names(getVariableModificationNames()), fixed_names(getFixedModificationNames()); // no modifications present and needed if (fixed_names.empty() && !peptide.isModified()) { return true; } // check whether the fixed modifications are fulfilled if (!fixed_names.empty()) { for (set<String>::const_iterator it1 = fixed_names.begin(); it1 != fixed_names.end(); ++it1) { String origin = ModificationsDB::getInstance()->getModification(*it1).getOrigin(); // only single 1lc amino acids are allowed if (origin.size() != 1) { continue; } for (AASequence::ConstIterator it2 = peptide.begin(); it2 != peptide.end(); ++it2) { if (origin == it2->getOneLetterCode()) { // check whether the residue is modified (has to be) if (!it2->isModified()) { return false; } // check whether the modification is the same if (ModificationsDB::getInstance()->getModification(*it1).getId() != it2->getModification()) { return false; } } } } } // check wether other modifications than the variable are present for (AASequence::ConstIterator it = peptide.begin(); it != peptide.end(); ++it) { if (it->isModified()) { String mod = ModificationsDB::getInstance()->getModification(it->getOneLetterCode(), it->getModification(), ResidueModification::ANYWHERE).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } } if (peptide.hasNTerminalModification()) { String mod = ModificationsDB::getInstance()->getTerminalModification(peptide.getNTerminalModification(), ResidueModification::N_TERM).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } if (peptide.hasCTerminalModification()) { String mod = ModificationsDB::getInstance()->getTerminalModification(peptide.getCTerminalModification(), ResidueModification::C_TERM).getFullId(); if (var_names.find(mod) == var_names.end() && fixed_names.find(mod) == fixed_names.end()) { return false; } } return true; }