//Visualizing ProteinIdentification object void MetaDataBrowser::visualize_(ProteinIdentification & meta, QTreeWidgetItem * parent) { ProteinIdentificationVisualizer * visualizer = new ProteinIdentificationVisualizer(isEditable(), this, this); QStringList labels; int id = ws_->addWidget(visualizer); labels << QString("ProteinIdentification %1").arg(meta.getSearchEngine().c_str()) << QString::number(id); visualizer->load(meta, id); QTreeWidgetItem * item; if (parent == nullptr) { item = new QTreeWidgetItem(treeview_, labels); } else { item = new QTreeWidgetItem(parent, labels); } //check for proteinhits objects meta.assignRanks(); for (Size i = 0; i < meta.getHits().size(); ++i) { visualize_(const_cast<ProteinHit &>(meta.getHits()[i]), item); } visualize_(dynamic_cast<MetaInfoInterface &>(meta), item); connectVisualizer_(visualizer); }
FeatureMapSim BaseLabeler::mergeProteinIdentificationsMaps_(const FeatureMapSimVector & maps) { // we do not have any features yet (or at least we ignore them), so simply iterate over the protein // identifications std::map<String, ProteinHit> prot_hits; Size channel_index = 1; for (FeatureMapSimVector::const_iterator maps_iterator = maps.begin(); maps_iterator != maps.end(); ++maps_iterator) { if (maps_iterator->getProteinIdentifications().size() == 0) continue; for (std::vector<ProteinHit>::const_iterator protein_hit = (*maps_iterator).getProteinIdentifications()[0].getHits().begin(); protein_hit != (*maps_iterator).getProteinIdentifications()[0].getHits().end(); ++protein_hit) { if (prot_hits.count((*protein_hit).getSequence())) // we already know this protein -- sum up abundances { SimIntensityType new_intensity = prot_hits[(*protein_hit).getSequence()].getMetaValue("intensity"); // remember channel intensity prot_hits[(*protein_hit).getSequence()].setMetaValue("intensity_" + String(channel_index), new_intensity); new_intensity += static_cast<SimIntensityType>((*protein_hit).getMetaValue("intensity")); prot_hits[(*protein_hit).getSequence()].setMetaValue("intensity", new_intensity); } else // new protein hit .. remember { ProteinHit protHit(*protein_hit); protHit.setMetaValue("intensity_" + String(channel_index), protHit.getMetaValue("intensity")); prot_hits.insert(std::pair<String, ProteinHit>((*protein_hit).getSequence(), protHit)); } } ++channel_index; } FeatureMapSim final_map; ProteinIdentification protIdent; for (std::map<String, ProteinHit>::iterator prot_hit_iter = prot_hits.begin(); prot_hit_iter != prot_hits.end(); ++prot_hit_iter) { protIdent.insertHit(prot_hit_iter->second); } std::vector<ProteinIdentification> protIdents; protIdents.push_back(protIdent); final_map.setProteinIdentifications(protIdents); return final_map; }
void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification, const vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification) { const String& run_identifier = identification.getIdentifier(); // build set of protein accessions that are referenced by peptides set<String> proteinaccessions_with_peptides; for (Size i = 0; i != peptide_identifications.size(); ++i) { // run id of protein and peptide identification must match if (run_identifier == peptide_identifications[i].getIdentifier()) { const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits(); // extract protein accessions of each peptide hit for (Size j = 0; j != tmp_pep_hits.size(); ++j) { const std::vector<String>& protein_accessions = tmp_pep_hits[j].getProteinAccessions(); for (Size k = 0; k != protein_accessions.size(); ++k) { String key = protein_accessions[k]; proteinaccessions_with_peptides.insert(key); } } } } // add all protein hits referenced by a peptide const vector<ProteinHit>& temp_protein_hits = identification.getHits(); vector<ProteinHit> filtered_protein_hits; for (Size j = 0; j != temp_protein_hits.size(); ++j) { const String& protein_accession = temp_protein_hits[j].getAccession(); if (proteinaccessions_with_peptides.find(protein_accession) != proteinaccessions_with_peptides.end()) { filtered_protein_hits.push_back(temp_protein_hits[j]); } } // copy identification filtered_identification = identification; // assign filtered hits to protein identification filtered_identification.setHits(filtered_protein_hits); }
void IDFilter::filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification) { String protein_sequences; String accession_sequences; vector<ProteinHit> filtered_protein_hits; ProteinHit temp_protein_hit; filtered_identification = identification; filtered_identification.setHits(vector<ProteinHit>()); for (Size i = 0; i < proteins.size(); i++) { accession_sequences.append("*" + proteins[i].identifier); } accession_sequences.append("*"); for (Size i = 0; i < identification.getHits().size(); i++) { if (accession_sequences.find("*" + identification.getHits()[i].getAccession()) != String::npos) { filtered_protein_hits.push_back(identification.getHits()[i]); } } filtered_identification.setHits(filtered_protein_hits); filtered_identification.assignRanks(); }
void MSSim::createFeatureMap_(const SimTypes::SampleProteins& proteins, SimTypes::FeatureMapSim& feature_map, Size map_index) { // clear feature map feature_map.clear(true); ProteinIdentification protIdent; for (SimTypes::SampleProteins::const_iterator it = proteins.begin(); it != proteins.end(); ++it) { // add new ProteinHit to ProteinIdentification ProteinHit protHit(0.0, 1, (it->entry).identifier, (it->entry).sequence); // copy all meta values from FASTA file parsing protHit = (it->meta); // additional meta values: protHit.setMetaValue("description", it->entry.description); protHit.setMetaValue("map_index", map_index); protIdent.insertHit(protHit); } vector<ProteinIdentification> vec_protIdent; vec_protIdent.push_back(protIdent); feature_map.setProteinIdentifications(vec_protIdent); }
#include <vector> /////////////////////////// START_TEST(XTandemXMLFile, "$Id$") ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// using namespace OpenMS; using namespace std; XTandemXMLFile xml_file; XTandemXMLFile* ptr; XTandemXMLFile* nullPointer = 0; ProteinIdentification protein_identification; vector<PeptideIdentification> peptide_identifications; vector<PeptideIdentification> peptide_identifications2; String date_string_1; String date_string_2; PeptideHit peptide_hit; START_SECTION((XTandemXMLFile())) ptr = new XTandemXMLFile(); TEST_NOT_EQUAL(ptr, nullPointer) END_SECTION START_SECTION(~XTandemXMLFile()) delete ptr; END_SECTION
END_SECTION START_SECTION(void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)) { ProtXMLFile f; ProteinIdentification proteins; PeptideIdentification peptides; String prot_file; StringList ids = ListUtils::create<String>("16627578304933075941,13229490167902618598"); // we do this twice, just to check that members are correctly reset etc.. for (Int i=0;i<2;++i) { prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_1.protXML"); f.load(prot_file, proteins, peptides); TEST_EQUAL(proteins.getIdentifier(), ids[i]); TEST_EQUAL(peptides.getIdentifier(), ids[i]); // groups TEST_EQUAL(proteins.getProteinGroups().size(), 7); TEST_EQUAL(proteins.getProteinGroups()[0].probability, 0.9990); TEST_EQUAL(proteins.getProteinGroups()[0].accessions.size(), 1); TEST_EQUAL(proteins.getProteinGroups()[3].accessions.size(), 2); TEST_EQUAL(proteins.getProteinGroups()[3].accessions[0], "P01876|IGHA1_HUMAN"); TEST_EQUAL(proteins.getProteinGroups()[3].accessions[1], "P01877|IGHA2_HUMAN"); TEST_EQUAL(proteins.getProteinGroups()[6].probability, 0.2026); TEST_EQUAL(proteins.getProteinGroups()[6].accessions.size(), 1); TEST_EQUAL(proteins.getIndistinguishableProteins().size(), 7); TEST_EQUAL(proteins.getIndistinguishableProteins()[0].accessions.size(), 1); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions.size(), 2); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[0], "P01876|IGHA1_HUMAN"); TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[1], "P01877|IGHA2_HUMAN"); TEST_EQUAL(proteins.getIndistinguishableProteins()[6].accessions.size(), 1); // proteins TEST_EQUAL(proteins.getHits().size(), 9); TEST_EQUAL(proteins.getHits()[0].getAccession(), "P02787|TRFE_HUMAN"); TEST_EQUAL(proteins.getHits()[0].getCoverage(), 8.6); TEST_EQUAL(proteins.getHits()[0].getScore(), 0.9990); // this one is indistinguishable... therefore it should have minimal infos TEST_EQUAL(proteins.getHits()[6].getAccession(), "P00739|HPTR_HUMAN"); TEST_EQUAL(proteins.getHits()[6].getCoverage(), -1); TEST_EQUAL(proteins.getHits()[6].getScore(), -1); TEST_EQUAL(proteins.getHits()[8].getAccession(), "P04217|A1BG_HUMAN"); TEST_EQUAL(proteins.getHits()[8].getCoverage(), 2.0); TEST_EQUAL(proteins.getHits()[8].getScore(), 0.2026); // peptides TEST_EQUAL(peptides.getHits().size(), 16); AASequence aa_seq("MYLGYEYVTAIR"); TEST_EQUAL(peptides.getHits()[0].getSequence(), aa_seq); TEST_EQUAL(peptides.getHits()[0].getCharge(), 2); TEST_EQUAL(peptides.getHits()[0].getScore(), 0.8633); TEST_EQUAL(peptides.getHits()[0].getProteinAccessions().size(), 1); TEST_EQUAL(peptides.getHits()[0].getProteinAccessions()[0], "P02787|TRFE_HUMAN"); TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_unique"), true); TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_contributing"), true); // load 2 nd file and prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_2.protXML"); } }
proteins.push_back(FASTAFile::FASTAEntry("Q872T5", "test description 2", "THPYGHAIVAGIERYPSK")); IDFilter* ptr = 0; IDFilter* nullPointer = 0; START_SECTION((IDFilter())) ptr = new IDFilter(); TEST_NOT_EQUAL(ptr, nullPointer); END_SECTION START_SECTION((~IDFilter())) delete ptr; END_SECTION START_SECTION((void filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification))) ProteinIdentification protein_identification2; IDFilter::filterIdentificationsByProteins(protein_identification, proteins, protein_identification2); TEST_EQUAL(protein_identification2.getScoreType(), "Mascot") TEST_EQUAL(protein_identification2.getHits().size(), 2) TEST_EQUAL(protein_identification2.getHits()[0].getAccession(), "Q824A5") TEST_EQUAL(protein_identification2.getHits()[1].getAccession(), "Q872T5") END_SECTION START_SECTION((void filterIdentificationsByProteins(const PeptideIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers = false))) PeptideIdentification identification2; IDFilter::filterIdentificationsByProteins(identification, proteins, identification2); TEST_EQUAL(identification2.getScoreType(), "Mascot")
void IDFilter::removeUnreferencedPeptideHits(const ProteinIdentification& identification, vector<PeptideIdentification>& peptide_identifications, bool delete_unreferenced_peptide_hits /* = false */) { const String& run_identifier = identification.getIdentifier(); // build set of protein accessions set<String> all_prots; const vector<ProteinHit>& temp_protein_hits = identification.getHits(); for (Size j = 0; j != temp_protein_hits.size(); ++j) { all_prots.insert(temp_protein_hits[j].getAccession()); } vector<PeptideIdentification> filtered_peptide_identifications; // remove peptides which are not referenced for (Size i = 0; i != peptide_identifications.size(); ++i) { // run id of protein and peptide identification must match if (run_identifier == peptide_identifications[i].getIdentifier()) { const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits(); vector<PeptideHit> filtered_pep_hits; // check protein accessions of each peptide hit for (Size j = 0; j != tmp_pep_hits.size(); ++j) { vector<PeptideEvidence> hit_peptide_evidences = tmp_pep_hits[j].getPeptideEvidences(); vector<PeptideEvidence> valid_peptide_evidence; for (vector<PeptideEvidence>::const_iterator pe_it = hit_peptide_evidences.begin(); pe_it != hit_peptide_evidences.end(); ++pe_it) { // find valid proteins if (all_prots.find(pe_it->getProteinAccession()) != all_prots.end()) { valid_peptide_evidence.push_back(*pe_it); } } if (!valid_peptide_evidence.empty() || !delete_unreferenced_peptide_hits) { // if present, copy the hit filtered_pep_hits.push_back(tmp_pep_hits[j]); filtered_pep_hits.back().setPeptideEvidences(valid_peptide_evidence); } } // if the peptide has hits, we use it if (!filtered_pep_hits.empty()) { filtered_peptide_identifications.push_back(peptide_identifications[i]); filtered_peptide_identifications.back().setHits(filtered_pep_hits); } } else // peptide is from another run, let it pass the filter { filtered_peptide_identifications.push_back(peptide_identifications[i]); } } // exchange with new hits filtered_peptide_identifications.swap(peptide_identifications); }
void IBSpectraFile::store(const String& filename, const ConsensusMap& cm) { // typdefs for shorter code typedef std::vector<ProteinHit>::iterator ProtHitIt; // general settings .. do we need to expose these? // ---------------------------------------------------------------------- /// Allow also non-unique peptides to be exported bool allow_non_unique = true; /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying double intensity_threshold = 0.00001; // ---------------------------------------------------------------------- // guess experiment type boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm); // we need the protein identifications to reference the protein names ProteinIdentification protIdent; bool has_proteinIdentifications = false; if (cm.getProteinIdentifications().size() > 0) { protIdent = cm.getProteinIdentifications()[0]; has_proteinIdentifications = true; } // start the file by adding the tsv header TextFile textFile; textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t")); for (ConsensusMap::ConstIterator cm_iter = cm.begin(); cm_iter != cm.end(); ++cm_iter) { const ConsensusFeature& cFeature = *cm_iter; std::vector<IdCSV> entries; /// 1st we extract the identification information from the consensus feature if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications) { // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization entries.push_back(IdCSV()); } else { // protein name: const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0]; std::set<String> protein_accessions = peptide_hit.extractProteinAccessions(); if (protein_accessions.size() != 1) { if (!allow_non_unique) continue; // we only want unique peptides } for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac) { IdCSV entry; entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge(); entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString(); entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge()); // write modif entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence()); ProtHitIt proteinHit = protIdent.findHit(*prot_ac); if (proteinHit == protIdent.getHits().end()) { std::cerr << "Protein referenced in peptide not found...\n"; continue; // protein not found } entry.accession = proteinHit->getAccession(); entries.push_back(entry); } } // 2nd we add the quantitative information of the channels // .. skip features with 0 intensity if (cFeature.getIntensity() == 0) { continue; } for (std::vector<IdCSV>::iterator entry = entries.begin(); entry != entries.end(); ++entry) { // set parent intensity entry->parent_intens = cFeature.getIntensity(); entry->retention_time = cFeature.getRT(); entry->spectrum = cFeature.getUniqueId(); entry->exp_mass = cFeature.getMZ(); // create output line StringList currentLine; // add entry to currentLine entry->toStringList(currentLine); // extract channel intensities and positions std::map<Int, double> intensityMap; ConsensusFeature::HandleSetType features = cFeature.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin(); fIt != features.end(); ++fIt) { intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(it->center)); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(intensityMap[int(it->center)])); } textFile.addLine(ListUtils::concatenate(currentLine, "\t")); } } // write to file textFile.store(filename); }
void PepNovoOutfile::load( const std::string & result_filename, vector<PeptideIdentification> & peptide_identifications, ProteinIdentification & protein_identification, const double & score_threshold, const IndexPosMappingType & index_to_precursor, const map<String, String> & pnovo_modkey_to_mod_id ) { // generally used variables StringList substrings; map<String, Int> columns; PeptideHit peptide_hit; String line, score_type = "PepNovo", version = "unknown", identifier, filename, sequence, sequence_with_mods; DateTime datetime = DateTime::now(); // there's no date given from PepNovo protein_identification.setDateTime(datetime); peptide_identifications.clear(); PeptideIdentification peptide_identification; protein_identification = ProteinIdentification(); // open the result ifstream result_file(result_filename.c_str()); if (!result_file) { throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, result_filename); } Size line_number(0); // used to report in which line an error occurred Size id_count(0); // number of IDs seen (not necessarily the ones finally returned) getSearchEngineAndVersion(result_filename, protein_identification); //if information could not be retrieved from the outfile use defaults if (protein_identification.getSearchEngineVersion().empty()) { protein_identification.setSearchEngine("PepNovo"); protein_identification.setSearchEngineVersion(version); } identifier = protein_identification.getSearchEngine() + "_" + datetime.getDate(); protein_identification.setIdentifier(identifier); map<String, String> mod_mask_map; const vector<String> & mods = protein_identification.getSearchParameters().variable_modifications; for (vector<String>::const_iterator mod_it = mods.begin(); mod_it != mods.end(); ++mod_it) { if (mod_it->empty()) continue; //cout<<*mod_it<<endl; if (pnovo_modkey_to_mod_id.find(*mod_it) != pnovo_modkey_to_mod_id.end()) { //cout<<keys_to_id.find(*mod_it)->second<<endl; ResidueModification tmp_mod = ModificationsDB::getInstance()->getModification(pnovo_modkey_to_mod_id.find(*mod_it)->second); if (mod_it->prefix(1) == "^" || mod_it->prefix(1) == "$") { mod_mask_map[*mod_it] = "(" + tmp_mod.getId() + ")"; } else { mod_mask_map[*mod_it] = String(tmp_mod.getOrigin()) + "(" + tmp_mod.getId() + ")"; } } else { if (mod_it->prefix(1) != "^" && mod_it->prefix(1) != "$") { mod_mask_map[*mod_it] = mod_it->prefix(1) + "[" + mod_it->substr(1) + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } else { mod_mask_map[*mod_it] = "[" + *mod_it + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } } } Size index; while (getline(result_file, line)) { if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); // remove weird EOL character line.trim(); ++line_number; if (line.hasPrefix(">> ")) // >> 1 /home/shared/pepnovo/4611_raw_ms2_picked.mzXML.1001.2.dta { ++id_count; if (!peptide_identification.empty() && !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } line.split(' ', substrings); //String index = File::basename(line.substr(line.find(' ', strlen(">> ")) + 1)); if (substrings.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns (spectrum Id) in file in line " + String(line_number) + String(" (should be 2 or more)!"), result_filename); } try { index = substrings[2].trim().toInt(); } catch (...) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected an index number in line " + String(line_number) + String(" at position 2 (line was: '" + line + "')!"), result_filename); } //cout<<"INDEX: "<<index<<endl; peptide_identification = PeptideIdentification(); bool success = false; if (index_to_precursor.size()>0) { if (index_to_precursor.find(index) != index_to_precursor.end()) { peptide_identification.setRT(index_to_precursor.find(index)->second.first); peptide_identification.setMZ(index_to_precursor.find(index)->second.second); success = true; } else throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Index '" + String(index) + String("' in line '" + line + "' not found in index table (line was: '" + line + "')!"), result_filename); } if (!success) { // try to reconstruct from title entry (usually sensible when MGF is supplied to PepNovo) try { if (substrings.size() >= 4) { StringList parts = ListUtils::create<String>(substrings[3], '_'); if (parts.size() >= 2) { peptide_identification.setRT(parts[1].toDouble()); peptide_identification.setMZ(parts[0].toDouble()); success = true; } } } catch (...) { } if (!success) throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Precursor could not be reconstructed from title '" + substrings[3] + String("' in line '" + line + "' (line was: '" + line + "')!"), result_filename); } peptide_identification.setSignificanceThreshold(score_threshold); peptide_identification.setScoreType(score_type); peptide_identification.setIdentifier(identifier); } else if (line.hasPrefix("#Index")) // #Index Prob Score N-mass C-Mass [M+H] Charge Sequence { if (columns.empty()) // map the column names to their column number { line.split('\t', substrings); for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i) { if ((*s_i) == "#Index") columns["Index"] = s_i - substrings.begin(); else if ((*s_i) == "RnkScr") columns["RnkScr"] = s_i - substrings.begin(); else if ((*s_i) == "PnvScr") columns["PnvScr"] = s_i - substrings.begin(); else if ((*s_i) == "N-Gap") columns["N-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "C-Gap") columns["C-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "[M+H]") columns["[M+H]"] = s_i - substrings.begin(); else if ((*s_i) == "Charge") columns["Charge"] = s_i - substrings.begin(); else if ((*s_i) == "Sequence") columns["Sequence"] = s_i - substrings.begin(); } if (columns.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } } while (getline(result_file, line)) { ++line_number; if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); line.trim(); if (line.empty()) break; line.split('\t', substrings); if (!substrings.empty()) { if (substrings.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } if (substrings[columns["RnkScr"]].toFloat() >= score_threshold) { peptide_hit = PeptideHit(); peptide_hit.setCharge(substrings[columns["Charge"]].toInt()); peptide_hit.setRank(substrings[columns["Index"]].toInt() + 1); peptide_hit.setScore(substrings[columns["RnkScr"]].toFloat()); peptide_hit.setMetaValue("PnvScr", substrings[columns["PnvScr"]].toFloat()); peptide_hit.setMetaValue("N-Gap", substrings[columns["N-Gap"]].toFloat()); peptide_hit.setMetaValue("C-Gap", substrings[columns["C-Gap"]].toFloat()); peptide_hit.setMetaValue("MZ", substrings[columns["[M+H]"]].toFloat()); sequence = substrings[columns["Sequence"]]; for (map<String, String>::iterator mask_it = mod_mask_map.begin(); mask_it != mod_mask_map.end(); ++mask_it) { if (mask_it->first.hasPrefix("^") && sequence.hasSubstring(mask_it->first)) { sequence.substitute(mask_it->first, ""); sequence = mask_it->second + sequence; } //cout<<mask_it->first<<" "<<mask_it->second<<endl; sequence.substitute(mask_it->first, mask_it->second); } peptide_hit.setSequence(AASequence::fromString(sequence)); peptide_identification.insertHit(peptide_hit); } } } } } if (!peptide_identifications.empty() || !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } result_file.close(); result_file.clear(); LOG_INFO << "Parsed " << id_count << " ids, retained " << peptide_identifications.size() << "." << std::endl; }
void PepNovoOutfile::getSearchEngineAndVersion( const String & pepnovo_output_without_parameters_filename, ProteinIdentification & protein_identification) { ifstream pepnovo_output_without_parameters(pepnovo_output_without_parameters_filename.c_str()); if (!pepnovo_output_without_parameters) { throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, pepnovo_output_without_parameters_filename); } ProteinIdentification::SearchParameters search_param; // searching for something like this: PepNovo v1.03 String line; vector<String> substrings; while (getline(pepnovo_output_without_parameters, line)) { if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); line.trim(); if (line.empty()) continue; if (line.hasPrefix("PepNovo")) { line.split(',', substrings); if (substrings.size() == 2) //previous version of PepNovo { protein_identification.setSearchEngine(substrings[0].trim()); protein_identification.setSearchEngineVersion(substrings[1].trim()); //else something is strange and we use defaults later } else { line.split(' ', substrings); if (substrings.size() == 3) { protein_identification.setSearchEngine(substrings[0].trim()); protein_identification.setSearchEngineVersion(substrings[2].trim()); //else something is strange and we use defaults later } } } if (line.hasPrefix("PM")) { line.split(' ', substrings); search_param.precursor_mass_tolerance = substrings.back().toFloat(); } if (line.hasPrefix("Fragment")) { line.split(' ', substrings); search_param.fragment_mass_tolerance = substrings.back().toFloat(); } if (line.hasPrefix("PTM")) { line.split(':', substrings); substrings.erase(substrings.begin()); for (vector<String>::iterator ptm_it = substrings.begin(); ptm_it != substrings.end(); ++ptm_it) { ptm_it->trim(); } if (!substrings.empty() && substrings[0] != "None") { search_param.variable_modifications = substrings; } } if (line.hasPrefix(">>")) { break; } } protein_identification.setSearchParameters(search_param); }