void BaseLabeler::mergeProteinAccessions_(Feature & target, const Feature & source) const { std::vector<String> target_acc(target.getPeptideIdentifications()[0].getHits()[0].getProteinAccessions()); std::vector<String> source_acc(source.getPeptideIdentifications()[0].getHits()[0].getProteinAccessions()); std::set<String> unique_acc; // all from 'target' for (vector<String>::iterator target_acc_iterator = target_acc.begin(); target_acc_iterator != target_acc.end(); ++target_acc_iterator) { unique_acc.insert(*target_acc_iterator); } // + some from 'source', which are not present yet for (vector<String>::iterator source_acc_iterator = source_acc.begin(); source_acc_iterator != source_acc.end(); ++source_acc_iterator) { std::pair<std::set<String>::iterator, bool> result = unique_acc.insert(*source_acc_iterator); if (result.second) { target_acc.push_back(*source_acc_iterator); } } PeptideHit pepHit(target.getPeptideIdentifications()[0].getHits()[0]); pepHit.setProteinAccessions(target_acc); std::vector<PeptideHit> pepHits; pepHits.push_back(pepHit); target.getPeptideIdentifications()[0].setHits(pepHits); }
void ITRAQLabeler::addModificationToPeptideHit_(Feature& feature, const String& modification, const Size& pos) const { vector<PeptideHit> pep_hits(feature.getPeptideIdentifications()[0].getHits()); AASequence modified_sequence(pep_hits[0].getSequence()); modified_sequence.setModification(pos, modification); pep_hits[0].setSequence(modified_sequence); feature.getPeptideIdentifications()[0].setHits(pep_hits); }
void ICPLLabeler::addModificationToPeptideHit_(Feature& feature, const String& modification) const { vector<PeptideHit> pep_hits(feature.getPeptideIdentifications()[0].getHits()); AASequence modified_sequence(pep_hits[0].getSequence()); if (modified_sequence.getNTerminalModification() == "") { // attach label only if the nterm is accessible modified_sequence.setNTerminalModification(modification); pep_hits[0].setSequence(modified_sequence); feature.getPeptideIdentifications()[0].setHits(pep_hits); } }
String ICPLLabeler::getUnmodifiedAASequence_(const Feature& feature, const String& label) const { AASequence unmodified = feature.getPeptideIdentifications()[0].getHits()[0].getSequence(); if (unmodified.getNTerminalModification() == label) { unmodified.setNTerminalModification(""); // remove terminal modification, if it is the channel specific one } return unmodified.toString(); }
void MetaDataBrowser::add(Feature & feature) { //peptide ids for (std::vector<PeptideIdentification>::iterator it = feature.getPeptideIdentifications().begin(); it != feature.getPeptideIdentifications().end(); ++it) { add(*it); } add(static_cast<MetaInfoInterface &>(feature)); treeview_->expandItem(treeview_->findItems(QString::number(0), Qt::MatchExactly, 1).first()); }
void ITRAQLabeler::labelPeptide_(const Feature& feature, SimTypes::FeatureMapSim& result) const { // modify with iTRAQ modification (needed for mass calculation and MS/MS signal) //site="Y" - low abundance //site="N-term" //site="K" - lysine String modification = (itraq_type_ == ItraqConstants::FOURPLEX ? "iTRAQ4plex" : "iTRAQ8plex"); vector<PeptideHit> pep_hits(feature.getPeptideIdentifications()[0].getHits()); AASequence seq(pep_hits[0].getSequence()); // N-term seq.setNTerminalModification(modification); // all "K": for (Size i = 0; i < seq.size(); ++i) { if (seq[i] == 'K' && !seq[i].isModified()) seq.setModification(i, modification); } result.resize(1); result[0] = feature; pep_hits[0].setSequence(seq); result[0].getPeptideIdentifications()[0].setHits(pep_hits); // some "Y": // for each "Y" create two new features, depending on labeling efficiency on "Y": if (y_labeling_efficiency_ == 0) return; for (Size i = 0; i < seq.size(); ++i) { if (seq[i] == 'Y' && !seq[i].isModified()) { if (y_labeling_efficiency_ == 1) { addModificationToPeptideHit_(result.back(), modification, i); } else // double number of features: { Size f_count = result.size(); for (Size f = 0; f < f_count; ++f) { // copy feature result.push_back(result[f]); // modify the copy addModificationToPeptideHit_(result.back(), modification, i); // adjust intensities: result.back().setIntensity(result.back().getIntensity() * y_labeling_efficiency_); result[f].setIntensity(result[f].getIntensity() * (1 - y_labeling_efficiency_)); } } } } }
String SILACLabeler::getUnmodifiedSequence_(const Feature& feature, const String& arginine_label, const String& lysine_label) const { String unmodified_sequence = ""; for (AASequence::ConstIterator residue = feature.getPeptideIdentifications()[0].getHits()[0].getSequence().begin(); residue != feature.getPeptideIdentifications()[0].getHits()[0].getSequence().end(); ++residue) { if (*residue == 'R' && residue->getModification() == arginine_label) { unmodified_sequence.append("R"); } else if (*residue == 'K' && residue->getModification() == lysine_label) { unmodified_sequence.append("K"); } else { unmodified_sequence.append(residue->getOneLetterCode()); } } return unmodified_sequence; }
void DigestSimulation::digest(SimTypes::FeatureMapSim& feature_map) { LOG_INFO << "Digest Simulation ... started" << std::endl; if ((String)param_.getValue("enzyme") == String("none")) { //peptides = proteins; // convert all proteins into peptides // for each protein_hit in the FeatureMap for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin(); protein_hit != feature_map.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { // generate a PeptideHit hit with the correct link to the protein PeptideHit pep_hit(1.0, 1, 0, AASequence::fromString(protein_hit->getSequence())); PeptideEvidence pe; pe.setProteinAccession(protein_hit->getAccession()); pep_hit.addPeptideEvidence(pe); // add the PeptideHit to the PeptideIdentification PeptideIdentification pep_id; pep_id.insertHit(pep_hit); // generate Feature with correct Intensity and corresponding PeptideIdentification Feature f; f.getPeptideIdentifications().push_back(pep_id); f.setIntensity(protein_hit->getMetaValue("intensity")); // copy intensity meta-values and additional annotations from Protein to Feature StringList keys; protein_hit->getKeys(keys); for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key) { f.setMetaValue(*it_key, protein_hit->getMetaValue(*it_key)); } // add Feature to SimTypes::FeatureMapSim feature_map.push_back(f); } return; } UInt min_peptide_length = param_.getValue("min_peptide_length"); bool use_log_model = param_.getValue("model") == "trained" ? true : false; UInt missed_cleavages = param_.getValue("model_naive:missed_cleavages"); double cleave_threshold = param_.getValue("model_trained:threshold"); EnzymaticDigestion digestion; digestion.setEnzyme(digestion.getEnzymeByName((String)param_.getValue("enzyme"))); digestion.setLogModelEnabled(use_log_model); digestion.setLogThreshold(cleave_threshold); std::vector<AASequence> digestion_products; // keep track of generated features std::map<AASequence, Feature> generated_features; // Iterate through ProteinHits in the FeatureMap and digest them for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin(); protein_hit != feature_map.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { // determine abundance of each digestion product (this is quite long now...) // we assume that each digestion product will have the same abundance // note: missed cleavages reduce overall abundance as they combine two (or more) single peptides // how many "atomic"(i.e. non-cleavable) peptides are created? digestion.setMissedCleavages(0); Size complete_digest_count = digestion.peptideCount(AASequence::fromString(protein_hit->getSequence())); // compute average number of "atomic" peptides summed from all digestion products Size number_atomic_whole = 0; Size number_of_digestion_products = 0; for (Size i = 0; (i <= missed_cleavages) && (i < complete_digest_count); ++i) { number_atomic_whole += (complete_digest_count - i) * (i + 1); number_of_digestion_products += (complete_digest_count - i); } // mean number of "atomic" peptides per digestion product is now: number_atomic_whole / number_of_digestion_products // -> thus abundance of a digestion product is: #proteins / avg#of"atomic"peptides // i.e.: protein->second / (number_atomic_whole / number_of_digestion_products) Map<String, SimTypes::SimIntensityType> intensities; StringList keys; protein_hit->getKeys(keys); for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key) { if (!it_key->hasPrefix("intensity")) continue; intensities[*it_key] = std::max(SimTypes::SimIntensityType(1), SimTypes::SimIntensityType(protein_hit->getMetaValue(*it_key)) * SimTypes::SimIntensityType(number_of_digestion_products) / SimTypes::SimIntensityType(number_atomic_whole)); // order changed for numeric stability } // do real digest digestion.setMissedCleavages(missed_cleavages); digestion.digest(AASequence::fromString(protein_hit->getSequence()), digestion_products); for (std::vector<AASequence>::const_iterator dp_it = digestion_products.begin(); dp_it != digestion_products.end(); ++dp_it) { if (dp_it->size() < min_peptide_length) continue; // sum equal peptide's intensities // *dp_it -> peptide // If we see this Peptide the first time -> generate corresponding feature if (generated_features.count(*dp_it) == 0) { PeptideHit pep_hit(1.0, 1, 0, *dp_it); PeptideIdentification pep_id; pep_id.insertHit(pep_hit); // create feature Feature f; f.getPeptideIdentifications().push_back(pep_id); // set intensity to 0 to avoid problems when summing up f.setIntensity(0.0); // copy all non-intensity meta values StringList lkeys; protein_hit->getKeys(lkeys); for (StringList::iterator key = lkeys.begin(); key != lkeys.end(); ++key) { if (!key->hasPrefix("intensity")) { f.setMetaValue(*key, protein_hit->getMetaValue(*key)); } } // insert into map generated_features.insert(std::make_pair(*dp_it, f)); } // sum up intensity values generated_features[*dp_it].setIntensity(generated_features[*dp_it].getIntensity() + intensities["intensity"]); // ... same for other intensities (iTRAQ...) for (Map<String, SimTypes::SimIntensityType>::const_iterator it_other = intensities.begin(); it_other != intensities.end(); ++it_other) { if (!generated_features[*dp_it].metaValueExists(it_other->first)) { generated_features[*dp_it].setMetaValue(it_other->first, it_other->second); } else { generated_features[*dp_it].setMetaValue(it_other->first, SimTypes::SimIntensityType(generated_features[*dp_it].getMetaValue(it_other->first)) + it_other->second); } } // add current protein accession // existing proteins accessions ... std::set<String> protein_accessions = generated_features[*dp_it].getPeptideIdentifications()[0].getHits()[0].extractProteinAccessions(); // ... add accession of current protein protein_accessions.insert(protein_hit->getAccession()); std::vector<PeptideIdentification> pep_idents = generated_features[*dp_it].getPeptideIdentifications(); std::vector<PeptideHit> pep_hits = pep_idents[0].getHits(); for (std::set<String>::const_iterator s_it = protein_accessions.begin(); s_it != protein_accessions.end(); ++s_it) { PeptideEvidence pe; pe.setProteinAccession(*s_it); pep_hits[0].addPeptideEvidence(pe); } pep_idents[0].setHits(pep_hits); generated_features[*dp_it].setPeptideIdentifications(pep_idents); } } // add generated_features to FeatureMap for (std::map<AASequence, Feature>::iterator it_gf = generated_features.begin(); it_gf != generated_features.end(); ++it_gf) { // round up intensity (it_gf->second).setIntensity(ceil((it_gf->second).getIntensity())); feature_map.push_back(it_gf->second); } }