//Visualizing ProteinIdentification object void MetaDataBrowser::visualize_(ProteinIdentification & meta, QTreeWidgetItem * parent) { ProteinIdentificationVisualizer * visualizer = new ProteinIdentificationVisualizer(isEditable(), this, this); QStringList labels; int id = ws_->addWidget(visualizer); labels << QString("ProteinIdentification %1").arg(meta.getSearchEngine().c_str()) << QString::number(id); visualizer->load(meta, id); QTreeWidgetItem * item; if (parent == nullptr) { item = new QTreeWidgetItem(treeview_, labels); } else { item = new QTreeWidgetItem(parent, labels); } //check for proteinhits objects meta.assignRanks(); for (Size i = 0; i < meta.getHits().size(); ++i) { visualize_(const_cast<ProteinHit &>(meta.getHits()[i]), item); } visualize_(dynamic_cast<MetaInfoInterface &>(meta), item); connectVisualizer_(visualizer); }
void IDFilter::filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification) { String protein_sequences; String accession_sequences; vector<ProteinHit> filtered_protein_hits; ProteinHit temp_protein_hit; filtered_identification = identification; filtered_identification.setHits(vector<ProteinHit>()); for (Size i = 0; i < proteins.size(); i++) { accession_sequences.append("*" + proteins[i].identifier); } accession_sequences.append("*"); for (Size i = 0; i < identification.getHits().size(); i++) { if (accession_sequences.find("*" + identification.getHits()[i].getAccession()) != String::npos) { filtered_protein_hits.push_back(identification.getHits()[i]); } } filtered_identification.setHits(filtered_protein_hits); filtered_identification.assignRanks(); }
void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification, const vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification) { const String& run_identifier = identification.getIdentifier(); // build set of protein accessions that are referenced by peptides set<String> proteinaccessions_with_peptides; for (Size i = 0; i != peptide_identifications.size(); ++i) { // run id of protein and peptide identification must match if (run_identifier == peptide_identifications[i].getIdentifier()) { const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits(); // extract protein accessions of each peptide hit for (Size j = 0; j != tmp_pep_hits.size(); ++j) { const std::vector<String>& protein_accessions = tmp_pep_hits[j].getProteinAccessions(); for (Size k = 0; k != protein_accessions.size(); ++k) { String key = protein_accessions[k]; proteinaccessions_with_peptides.insert(key); } } } } // add all protein hits referenced by a peptide const vector<ProteinHit>& temp_protein_hits = identification.getHits(); vector<ProteinHit> filtered_protein_hits; for (Size j = 0; j != temp_protein_hits.size(); ++j) { const String& protein_accession = temp_protein_hits[j].getAccession(); if (proteinaccessions_with_peptides.find(protein_accession) != proteinaccessions_with_peptides.end()) { filtered_protein_hits.push_back(temp_protein_hits[j]); } } // copy identification filtered_identification = identification; // assign filtered hits to protein identification filtered_identification.setHits(filtered_protein_hits); }
PeptideHit peptide_hit; START_SECTION((XTandemXMLFile())) ptr = new XTandemXMLFile(); TEST_NOT_EQUAL(ptr, nullPointer) END_SECTION START_SECTION(~XTandemXMLFile()) delete ptr; END_SECTION ptr = new XTandemXMLFile(); START_SECTION(void setModificationDefinitionsSet(const ModificationDefinitionsSet &rhs)) ModificationDefinitionsSet mod_set(ListUtils::create<String>(""), ListUtils::create<String>("Carbamidomethyl (C),Oxidation (M),Carboxymethyl (C)")); ptr->setModificationDefinitionsSet(mod_set); NOT_TESTABLE END_SECTION START_SECTION(void load(const String& filename, ProteinIdentification& protein_identification, std::vector<PeptideIdentification>& id_data)) ptr->load(OPENMS_GET_TEST_DATA_PATH("XTandemXMLFile_test.xml"), protein_identification, peptide_identifications); TEST_EQUAL(peptide_identifications.size(), 303) TEST_EQUAL(protein_identification.getHits().size(), 497) ptr->load(OPENMS_GET_TEST_DATA_PATH("XTandemXMLFile_test_2.xml"), protein_identification, peptide_identifications); END_SECTION ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// END_TEST
END_SECTION START_SECTION(void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)) { ProtXMLFile f; ProteinIdentification proteins; PeptideIdentification peptides; String prot_file; StringList ids = ListUtils::create<String>("16627578304933075941,13229490167902618598"); // we do this twice, just to check that members are correctly reset etc.. for (Int i=0;i<2;++i) { prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_1.protXML"); f.load(prot_file, proteins, peptides); TEST_EQUAL(proteins.getIdentifier(), ids[i]); TEST_EQUAL(peptides.getIdentifier(), ids[i]); // groups TEST_EQUAL(proteins.getProteinGroups().size(), 7); TEST_EQUAL(proteins.getProteinGroups()[0].probability, 0.9990); TEST_EQUAL(proteins.getProteinGroups()[0].accessions.size(), 1); TEST_EQUAL(proteins.getProteinGroups()[3].accessions.size(), 2); TEST_EQUAL(proteins.getProteinGroups()[3].accessions[0], "P01876|IGHA1_HUMAN"); TEST_EQUAL(proteins.getProteinGroups()[3].accessions[1], "P01877|IGHA2_HUMAN"); TEST_EQUAL(proteins.getProteinGroups()[6].probability, 0.2026); TEST_EQUAL(proteins.getProteinGroups()[6].accessions.size(), 1); TEST_EQUAL(proteins.getIndistinguishableProteins().size(), 7); TEST_EQUAL(proteins.getIndistinguishableProteins()[0].accessions.size(), 1); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions.size(), 2); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[0], "P01876|IGHA1_HUMAN"); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[1], "P01877|IGHA2_HUMAN"); TEST_EQUAL(proteins.getIndistinguishableProteins()[6].accessions.size(), 1); // proteins TEST_EQUAL(proteins.getHits().size(), 9); TEST_EQUAL(proteins.getHits()[0].getAccession(), "P02787|TRFE_HUMAN"); TEST_EQUAL(proteins.getHits()[0].getCoverage(), 8.6); TEST_EQUAL(proteins.getHits()[0].getScore(), 0.9990); // this one is indistinguishable... therefore it should have minimal infos TEST_EQUAL(proteins.getHits()[6].getAccession(), "P00739|HPTR_HUMAN"); TEST_EQUAL(proteins.getHits()[6].getCoverage(), -1); TEST_EQUAL(proteins.getHits()[6].getScore(), -1); TEST_EQUAL(proteins.getHits()[8].getAccession(), "P04217|A1BG_HUMAN"); TEST_EQUAL(proteins.getHits()[8].getCoverage(), 2.0); TEST_EQUAL(proteins.getHits()[8].getScore(), 0.2026); // peptides TEST_EQUAL(peptides.getHits().size(), 16); AASequence aa_seq("MYLGYEYVTAIR"); TEST_EQUAL(peptides.getHits()[0].getSequence(), aa_seq); TEST_EQUAL(peptides.getHits()[0].getCharge(), 2); TEST_EQUAL(peptides.getHits()[0].getScore(), 0.8633); TEST_EQUAL(peptides.getHits()[0].getProteinAccessions().size(), 1); TEST_EQUAL(peptides.getHits()[0].getProteinAccessions()[0], "P02787|TRFE_HUMAN"); TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_unique"), true); TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_contributing"), true); // load 2 nd file and prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_2.protXML"); } }
START_SECTION((IDFilter())) ptr = new IDFilter(); TEST_NOT_EQUAL(ptr, nullPointer); END_SECTION START_SECTION((~IDFilter())) delete ptr; END_SECTION START_SECTION((void filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification))) ProteinIdentification protein_identification2; IDFilter::filterIdentificationsByProteins(protein_identification, proteins, protein_identification2); TEST_EQUAL(protein_identification2.getScoreType(), "Mascot") TEST_EQUAL(protein_identification2.getHits().size(), 2) TEST_EQUAL(protein_identification2.getHits()[0].getAccession(), "Q824A5") TEST_EQUAL(protein_identification2.getHits()[1].getAccession(), "Q872T5") END_SECTION START_SECTION((void filterIdentificationsByProteins(const PeptideIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers = false))) PeptideIdentification identification2; IDFilter::filterIdentificationsByProteins(identification, proteins, identification2); TEST_EQUAL(identification2.getScoreType(), "Mascot") TEST_EQUAL(identification2.getHits().size(), 2) TEST_EQUAL(identification2.getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) TEST_EQUAL(identification2.getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) END_SECTION
void IDFilter::removeUnreferencedPeptideHits(const ProteinIdentification& identification, vector<PeptideIdentification>& peptide_identifications, bool delete_unreferenced_peptide_hits /* = false */) { const String& run_identifier = identification.getIdentifier(); // build set of protein accessions set<String> all_prots; const vector<ProteinHit>& temp_protein_hits = identification.getHits(); for (Size j = 0; j != temp_protein_hits.size(); ++j) { all_prots.insert(temp_protein_hits[j].getAccession()); } vector<PeptideIdentification> filtered_peptide_identifications; // remove peptides which are not referenced for (Size i = 0; i != peptide_identifications.size(); ++i) { // run id of protein and peptide identification must match if (run_identifier == peptide_identifications[i].getIdentifier()) { const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits(); vector<PeptideHit> filtered_pep_hits; // check protein accessions of each peptide hit for (Size j = 0; j != tmp_pep_hits.size(); ++j) { vector<PeptideEvidence> hit_peptide_evidences = tmp_pep_hits[j].getPeptideEvidences(); vector<PeptideEvidence> valid_peptide_evidence; for (vector<PeptideEvidence>::const_iterator pe_it = hit_peptide_evidences.begin(); pe_it != hit_peptide_evidences.end(); ++pe_it) { // find valid proteins if (all_prots.find(pe_it->getProteinAccession()) != all_prots.end()) { valid_peptide_evidence.push_back(*pe_it); } } if (!valid_peptide_evidence.empty() || !delete_unreferenced_peptide_hits) { // if present, copy the hit filtered_pep_hits.push_back(tmp_pep_hits[j]); filtered_pep_hits.back().setPeptideEvidences(valid_peptide_evidence); } } // if the peptide has hits, we use it if (!filtered_pep_hits.empty()) { filtered_peptide_identifications.push_back(peptide_identifications[i]); filtered_peptide_identifications.back().setHits(filtered_pep_hits); } } else // peptide is from another run, let it pass the filter { filtered_peptide_identifications.push_back(peptide_identifications[i]); } } // exchange with new hits filtered_peptide_identifications.swap(peptide_identifications); }
void IBSpectraFile::store(const String& filename, const ConsensusMap& cm) { // typdefs for shorter code typedef std::vector<ProteinHit>::iterator ProtHitIt; // general settings .. do we need to expose these? // ---------------------------------------------------------------------- /// Allow also non-unique peptides to be exported bool allow_non_unique = true; /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying double intensity_threshold = 0.00001; // ---------------------------------------------------------------------- // guess experiment type boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm); // we need the protein identifications to reference the protein names ProteinIdentification protIdent; bool has_proteinIdentifications = false; if (cm.getProteinIdentifications().size() > 0) { protIdent = cm.getProteinIdentifications()[0]; has_proteinIdentifications = true; } // start the file by adding the tsv header TextFile textFile; textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t")); for (ConsensusMap::ConstIterator cm_iter = cm.begin(); cm_iter != cm.end(); ++cm_iter) { const ConsensusFeature& cFeature = *cm_iter; std::vector<IdCSV> entries; /// 1st we extract the identification information from the consensus feature if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications) { // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization entries.push_back(IdCSV()); } else { // protein name: const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0]; std::set<String> protein_accessions = peptide_hit.extractProteinAccessions(); if (protein_accessions.size() != 1) { if (!allow_non_unique) continue; // we only want unique peptides } for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac) { IdCSV entry; entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge(); entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString(); entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge()); // write modif entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence()); ProtHitIt proteinHit = protIdent.findHit(*prot_ac); if (proteinHit == protIdent.getHits().end()) { std::cerr << "Protein referenced in peptide not found...\n"; continue; // protein not found } entry.accession = proteinHit->getAccession(); entries.push_back(entry); } } // 2nd we add the quantitative information of the channels // .. skip features with 0 intensity if (cFeature.getIntensity() == 0) { continue; } for (std::vector<IdCSV>::iterator entry = entries.begin(); entry != entries.end(); ++entry) { // set parent intensity entry->parent_intens = cFeature.getIntensity(); entry->retention_time = cFeature.getRT(); entry->spectrum = cFeature.getUniqueId(); entry->exp_mass = cFeature.getMZ(); // create output line StringList currentLine; // add entry to currentLine entry->toStringList(currentLine); // extract channel intensities and positions std::map<Int, double> intensityMap; ConsensusFeature::HandleSetType features = cFeature.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin(); fIt != features.end(); ++fIt) { intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(it->center)); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(intensityMap[int(it->center)])); } textFile.addLine(ListUtils::concatenate(currentLine, "\t")); } } // write to file textFile.store(filename); }