void IDFilter::filterIdentificationsByRTPValues(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, DoubleReal p_value) { DoubleReal border = 1 - p_value; vector<PeptideHit> filtered_peptide_hits; PeptideHit temp_peptide_hit; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); Size missing_meta_value = 0; for (Size i = 0; i < identification.getHits().size(); i++) { if (identification.getHits()[i].metaValueExists("predicted_RT_p_value")) { if ((DoubleReal)(identification.getHits()[i].getMetaValue("predicted_RT_p_value")) <= border) { filtered_peptide_hits.push_back(identification.getHits()[i]); } } else ++missing_meta_value; } if (missing_meta_value > 0) LOG_WARN << "Filtering identifications by p-value did not work on " << missing_meta_value << " of " << identification.getHits().size() << " hits. Your data is missing a meta-value ('predicted_RT_p_value') from RTPredict!\n"; if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void IDFilter::filterIdentificationsByCharge(const PeptideIdentification& identification, Int min_charge, PeptideIdentification& filtered_identification) { vector<Size> new_peptide_indices; vector<PeptideHit> filtered_peptide_hits; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); const vector<PeptideHit>& temp_peptide_hits = identification.getHits(); for (Size i = 0; i < temp_peptide_hits.size(); i++) { if (temp_peptide_hits[i].getCharge() >= min_charge) { new_peptide_indices.push_back(i); } } for (Size i = 0; i < new_peptide_indices.size(); i++) { filtered_peptide_hits.push_back(identification.getHits()[new_peptide_indices[i]]); } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void IDFilter::filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification, const set<String>& peptides, PeptideIdentification& filtered_identification) { String protein_sequences; String accession_sequences; vector<PeptideHit> filtered_peptide_hits; PeptideHit temp_peptide_hit; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); for (Size i = 0; i < identification.getHits().size(); i++) { if (find(peptides.begin(), peptides.end(), identification.getHits()[i].getSequence().toString()) == peptides.end()) { filtered_peptide_hits.push_back(identification.getHits()[i]); } } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
//Visualizing PeptideIdentification object void MetaDataBrowser::visualize_(PeptideIdentification & meta, QTreeWidgetItem * parent) { PeptideIdentificationVisualizer * visualizer = new PeptideIdentificationVisualizer(isEditable(), this, this); QStringList labels; int id = ws_->addWidget(visualizer); labels << QString("PeptideIdentification %1").arg(meta.getScoreType().c_str()) << QString::number(id); visualizer->load(meta, id); QTreeWidgetItem * item; if (parent == nullptr) { item = new QTreeWidgetItem(treeview_, labels); } else { item = new QTreeWidgetItem(parent, labels); } //check for proteins and peptides hits meta.assignRanks(); //list all peptides hits in the tree for (Size i = 0; i < meta.getHits().size(); ++i) { visualize_(const_cast<PeptideHit &>(meta.getHits()[i]), item); } visualize_(dynamic_cast<MetaInfoInterface &>(meta), item); connectVisualizer_(visualizer); }
void IDFilter::filterIdentificationsByProteins(const PeptideIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers) { // TODO: this is highly inefficient! the Protein-Index should be build once for all peptide-identifications instead of // doing this once for every ID. Furthermore the index itself is inefficient (use seqan instead) String protein_sequences; String accession_sequences; vector<PeptideHit> filtered_peptide_hits; PeptideHit temp_peptide_hit; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); for (Size i = 0; i < proteins.size(); i++) { if (proteins[i].identifier != "") { accession_sequences.append("*" + proteins[i].identifier); } if (proteins[i].sequence != "") { protein_sequences.append("*" + proteins[i].sequence); } } accession_sequences.append("*"); protein_sequences.append("*"); for (Size i = 0; i < identification.getHits().size(); i++) { if (no_protein_identifiers || accession_sequences == "*") // filter by sequence alone if no protein accesssions are available { if (protein_sequences.find(identification.getHits()[i].getSequence().toUnmodifiedString()) != String::npos) { filtered_peptide_hits.push_back(identification.getHits()[i]); } } else // filter by protein accessions { for (vector<String>::const_iterator ac_it = identification.getHits()[i].getProteinAccessions().begin(); ac_it != identification.getHits()[i].getProteinAccessions().end(); ++ac_it) { if (accession_sequences.find("*" + *ac_it) != String::npos) { filtered_peptide_hits.push_back(identification.getHits()[i]); break; // we found a matching protein, the peptide is valid -> exit } } } } filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); }
void IDFilter::filterIdentificationsByBestHits(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, bool strict) { vector<PeptideHit> filtered_peptide_hits; PeptideHit temp_peptide_hit; vector<Size> new_peptide_indices; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); if (!identification.getHits().empty()) { Real optimal_value = identification.getHits()[0].getScore(); new_peptide_indices.push_back(0); // searching for peptide(s) with maximal score for (Size i = 1; i < identification.getHits().size(); i++) { Real temp_score = identification.getHits()[i].getScore(); bool new_leader = false; if ((identification.isHigherScoreBetter() && (temp_score > optimal_value)) || (!identification.isHigherScoreBetter() && (temp_score < optimal_value))) new_leader = true; if (new_leader) { optimal_value = temp_score; new_peptide_indices.clear(); new_peptide_indices.push_back(i); } else if (temp_score == optimal_value) { new_peptide_indices.push_back(i); } } if (!strict || new_peptide_indices.size() == 1) { for (Size i = 0; i < new_peptide_indices.size(); i++) { filtered_peptide_hits.push_back(identification.getHits()[new_peptide_indices[i]]); } } } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void IDFilter::filterIdentificationsByCharge(const PeptideIdentification& identification, Int min_charge, PeptideIdentification& filtered_identification) { filtered_identification = identification; const vector<PeptideHit>& temp_peptide_hits = identification.getHits(); vector<PeptideHit> filtered_peptide_hits; for (Size i = 0; i < temp_peptide_hits.size(); ++i) { if (temp_peptide_hits[i].getCharge() >= min_charge) { filtered_peptide_hits.push_back(temp_peptide_hits[i]); } } filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); }
void IDFilter::filterIdentificationsByLength(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, Size min_length, Size max_length) { vector<Size> new_peptide_indices; vector<PeptideHit> filtered_peptide_hits; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); Size ml = max_length; if (max_length < min_length) { ml = UINT_MAX; } const vector<PeptideHit>& temp_peptide_hits = identification.getHits(); for (Size i = 0; i < temp_peptide_hits.size(); i++) { if (temp_peptide_hits[i].getSequence().size() >= min_length && temp_peptide_hits[i].getSequence().size() <= ml) { new_peptide_indices.push_back(i); } } for (Size i = 0; i < new_peptide_indices.size(); i++) { filtered_peptide_hits.push_back(identification.getHits()[new_peptide_indices[i]]); } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void IDFilter::filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification, const set<String>& peptides, bool ignore_modifications, PeptideIdentification& filtered_identification) { vector<PeptideHit> filtered_peptide_hits; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); for (Size i = 0; i < identification.getHits().size(); i++) { String query = ignore_modifications ? identification.getHits()[i].getSequence().toUnmodifiedString() : identification.getHits()[i].getSequence().toString(); if (find(peptides.begin(), peptides.end(), query) == peptides.end()) { filtered_peptide_hits.push_back(identification.getHits()[i]); } } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void IDFilter::filterIdentificationsByLength(const PeptideIdentification& identification, PeptideIdentification& filtered_identification, Size min_length, Size max_length) { filtered_identification = identification; if (max_length < min_length) { max_length = UINT_MAX; } const vector<PeptideHit>& temp_peptide_hits = identification.getHits(); vector<PeptideHit> filtered_peptide_hits; for (Size i = 0; i < temp_peptide_hits.size(); ++i) { if (min_length <= temp_peptide_hits[i].getSequence().size() && temp_peptide_hits[i].getSequence().size() <= max_length) { filtered_peptide_hits.push_back(temp_peptide_hits[i]); } } filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); }
void IDFilter::filterIdentificationsByVariableModifications(const PeptideIdentification& identification, const vector<String>& fixed_modifications, PeptideIdentification& filtered_identification) { vector<Size> new_peptide_indices; vector<PeptideHit> filtered_peptide_hits; filtered_identification = identification; filtered_identification.setHits(vector<PeptideHit>()); const vector<PeptideHit>& temp_peptide_hits = identification.getHits(); for (Size i = 0; i < temp_peptide_hits.size(); i++) { const AASequence& aa_seq = temp_peptide_hits[i].getSequence(); /* TODO: check these cases // check terminal modifications if (aa_seq.hasNTerminalModification()) { String unimod_name = aa_seq.getNTerminalModification(); if (find(fixed_modifications.begin(), fixed_modifications.end(), unimod_name) == fixed_modifications.end()) { new_peptide_indices.push_back(i); continue; } } if (aa_seq.hasCTerminalModification()) { String unimod_name = aa_seq.getCTerminalModification(); if (find(fixed_modifications.begin(), fixed_modifications.end(), unimod_name) == fixed_modifications.end()) { new_peptide_indices.push_back(i); continue; } } */ // check internal modifications for (Size j = 0; j != aa_seq.size(); ++j) { if (aa_seq[j].isModified()) { String unimod_name = aa_seq[j].getModification() + " (" + aa_seq[j].getOneLetterCode() + ")"; if (find(fixed_modifications.begin(), fixed_modifications.end(), unimod_name) == fixed_modifications.end()) { new_peptide_indices.push_back(i); continue; } } } } for (Size i = 0; i < new_peptide_indices.size(); i++) { const PeptideHit& ph = temp_peptide_hits[new_peptide_indices[i]]; filtered_peptide_hits.push_back(ph); } if (!filtered_peptide_hits.empty()) { filtered_identification.setHits(filtered_peptide_hits); filtered_identification.assignRanks(); } }
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }