示例#1
0
  //Visualizing ProteinIdentification object
  void MetaDataBrowser::visualize_(ProteinIdentification & meta, QTreeWidgetItem * parent)
  {
    ProteinIdentificationVisualizer * visualizer = new ProteinIdentificationVisualizer(isEditable(), this, this);

    QStringList labels;
    int id = ws_->addWidget(visualizer);
    labels << QString("ProteinIdentification %1").arg(meta.getSearchEngine().c_str()) << QString::number(id);

    visualizer->load(meta, id);

    QTreeWidgetItem * item;
    if (parent == nullptr)
    {
      item = new QTreeWidgetItem(treeview_, labels);
    }
    else
    {
      item = new QTreeWidgetItem(parent, labels);
    }

    //check for proteinhits objects
    meta.assignRanks();

    for (Size i = 0; i < meta.getHits().size(); ++i)
    {
      visualize_(const_cast<ProteinHit &>(meta.getHits()[i]), item);
    }

    visualize_(dynamic_cast<MetaInfoInterface &>(meta), item);

    connectVisualizer_(visualizer);
  }
示例#2
0
FeatureMapSim BaseLabeler::mergeProteinIdentificationsMaps_(const FeatureMapSimVector & maps)
{
    // we do not have any features yet (or at least we ignore them), so simply iterate over the protein
    // identifications
    std::map<String, ProteinHit> prot_hits;
    Size channel_index = 1;
    for (FeatureMapSimVector::const_iterator maps_iterator = maps.begin(); maps_iterator != maps.end(); ++maps_iterator)
    {
        if (maps_iterator->getProteinIdentifications().size() == 0)
            continue;

        for (std::vector<ProteinHit>::const_iterator protein_hit = (*maps_iterator).getProteinIdentifications()[0].getHits().begin();
                protein_hit != (*maps_iterator).getProteinIdentifications()[0].getHits().end();
                ++protein_hit)
        {
            if (prot_hits.count((*protein_hit).getSequence())) // we already know this protein -- sum up abundances
            {
                SimIntensityType new_intensity = prot_hits[(*protein_hit).getSequence()].getMetaValue("intensity");

                // remember channel intensity
                prot_hits[(*protein_hit).getSequence()].setMetaValue("intensity_" + String(channel_index), new_intensity);

                new_intensity += static_cast<SimIntensityType>((*protein_hit).getMetaValue("intensity"));
                prot_hits[(*protein_hit).getSequence()].setMetaValue("intensity", new_intensity);
            }
            else // new protein hit .. remember
            {
                ProteinHit protHit(*protein_hit);
                protHit.setMetaValue("intensity_" + String(channel_index), protHit.getMetaValue("intensity"));
                prot_hits.insert(std::pair<String, ProteinHit>((*protein_hit).getSequence(), protHit));
            }
        }
        ++channel_index;
    }

    FeatureMapSim final_map;
    ProteinIdentification protIdent;

    for (std::map<String, ProteinHit>::iterator prot_hit_iter = prot_hits.begin(); prot_hit_iter != prot_hits.end(); ++prot_hit_iter)
    {
        protIdent.insertHit(prot_hit_iter->second);
    }
    std::vector<ProteinIdentification> protIdents;
    protIdents.push_back(protIdent);
    final_map.setProteinIdentifications(protIdents);

    return final_map;
}
示例#3
0
  void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification, const vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions that are referenced by peptides
    set<String> proteinaccessions_with_peptides;
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        // extract protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          const std::vector<String>& protein_accessions = tmp_pep_hits[j].getProteinAccessions();
          for (Size k = 0; k != protein_accessions.size(); ++k)
          {
            String key = protein_accessions[k];
            proteinaccessions_with_peptides.insert(key);
          }
        }
      }
    }

    // add all protein hits referenced by a peptide
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    vector<ProteinHit> filtered_protein_hits;
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      const String& protein_accession = temp_protein_hits[j].getAccession();
      if (proteinaccessions_with_peptides.find(protein_accession) != proteinaccessions_with_peptides.end())
      {
        filtered_protein_hits.push_back(temp_protein_hits[j]);
      }
    }

    // copy identification
    filtered_identification = identification;

    // assign filtered hits to protein identification
    filtered_identification.setHits(filtered_protein_hits);
  }
示例#4
0
  void IDFilter::filterIdentificationsByProteins(const ProteinIdentification& identification,
                                                 const vector<FASTAFile::FASTAEntry>& proteins,
                                                 ProteinIdentification& filtered_identification)
  {
    String protein_sequences;
    String accession_sequences;
    vector<ProteinHit> filtered_protein_hits;
    ProteinHit temp_protein_hit;

    filtered_identification = identification;
    filtered_identification.setHits(vector<ProteinHit>());

    for (Size i = 0; i < proteins.size(); i++)
    {
      accession_sequences.append("*" + proteins[i].identifier);
    }
    accession_sequences.append("*");

    for (Size i = 0; i < identification.getHits().size(); i++)
    {
      if (accession_sequences.find("*" + identification.getHits()[i].getAccession()) != String::npos)
      {
        filtered_protein_hits.push_back(identification.getHits()[i]);
      }
    }

    filtered_identification.setHits(filtered_protein_hits);
    filtered_identification.assignRanks();
  }
示例#5
0
  void MSSim::createFeatureMap_(const SimTypes::SampleProteins& proteins, SimTypes::FeatureMapSim& feature_map, Size map_index)
  {
    // clear feature map
    feature_map.clear(true);
    ProteinIdentification protIdent;

    for (SimTypes::SampleProteins::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
    {
      // add new ProteinHit to ProteinIdentification
      ProteinHit protHit(0.0, 1, (it->entry).identifier, (it->entry).sequence);
      // copy all meta values from FASTA file parsing
      protHit = (it->meta);
      // additional meta values:
      protHit.setMetaValue("description", it->entry.description);
      protHit.setMetaValue("map_index", map_index);
      protIdent.insertHit(protHit);
    }

    vector<ProteinIdentification> vec_protIdent;
    vec_protIdent.push_back(protIdent);
    feature_map.setProteinIdentifications(vec_protIdent);
  }
示例#6
0
#include <vector>

///////////////////////////

START_TEST(XTandemXMLFile, "$Id$")

/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////

using namespace OpenMS;
using namespace std;

XTandemXMLFile xml_file;
XTandemXMLFile* ptr;
XTandemXMLFile* nullPointer = 0;
ProteinIdentification protein_identification;
vector<PeptideIdentification> peptide_identifications;
vector<PeptideIdentification> peptide_identifications2;
String date_string_1;
String date_string_2;
PeptideHit peptide_hit;

START_SECTION((XTandemXMLFile()))
	ptr = new XTandemXMLFile();
	TEST_NOT_EQUAL(ptr, nullPointer)
END_SECTION

START_SECTION(~XTandemXMLFile())
	delete ptr;
END_SECTION
示例#7
0
END_SECTION

START_SECTION(void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids))
{
	ProtXMLFile f;
	ProteinIdentification proteins;
	PeptideIdentification peptides;
	String prot_file;

  StringList ids = ListUtils::create<String>("16627578304933075941,13229490167902618598");

	// we do this twice, just to check that members are correctly reset etc..
	for (Int i=0;i<2;++i)
	{
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_1.protXML");
		f.load(prot_file, proteins, peptides);
		TEST_EQUAL(proteins.getIdentifier(), ids[i]);
		TEST_EQUAL(peptides.getIdentifier(), ids[i]);
	
		// groups	
		TEST_EQUAL(proteins.getProteinGroups().size(), 7);
		TEST_EQUAL(proteins.getProteinGroups()[0].probability, 0.9990);
		TEST_EQUAL(proteins.getProteinGroups()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[6].probability, 0.2026);
		TEST_EQUAL(proteins.getProteinGroups()[6].accessions.size(), 1);

		TEST_EQUAL(proteins.getIndistinguishableProteins().size(), 7);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[6].accessions.size(), 1);

		// proteins
		TEST_EQUAL(proteins.getHits().size(), 9);
		TEST_EQUAL(proteins.getHits()[0].getAccession(), "P02787|TRFE_HUMAN");
		TEST_EQUAL(proteins.getHits()[0].getCoverage(), 8.6);
		TEST_EQUAL(proteins.getHits()[0].getScore(), 0.9990);
    // this one is indistinguishable... therefore it should have minimal infos
		TEST_EQUAL(proteins.getHits()[6].getAccession(), "P00739|HPTR_HUMAN");
		TEST_EQUAL(proteins.getHits()[6].getCoverage(), -1);
		TEST_EQUAL(proteins.getHits()[6].getScore(), -1);

		TEST_EQUAL(proteins.getHits()[8].getAccession(), "P04217|A1BG_HUMAN");
		TEST_EQUAL(proteins.getHits()[8].getCoverage(), 2.0);
		TEST_EQUAL(proteins.getHits()[8].getScore(), 0.2026);
	
		// peptides
		TEST_EQUAL(peptides.getHits().size(), 16);
		AASequence aa_seq("MYLGYEYVTAIR");
		TEST_EQUAL(peptides.getHits()[0].getSequence(), aa_seq);
		TEST_EQUAL(peptides.getHits()[0].getCharge(), 2);
		TEST_EQUAL(peptides.getHits()[0].getScore(), 0.8633);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions().size(), 1);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions()[0], "P02787|TRFE_HUMAN");
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_unique"), true);
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_contributing"), true);
			
		// load 2 nd file and
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_2.protXML");
		
	}
}
proteins.push_back(FASTAFile::FASTAEntry("Q872T5", "test description 2", "THPYGHAIVAGIERYPSK"));

IDFilter* ptr = 0;
IDFilter* nullPointer = 0;

START_SECTION((IDFilter()))
  ptr = new IDFilter();
  TEST_NOT_EQUAL(ptr, nullPointer);
END_SECTION

START_SECTION((~IDFilter()))
  delete ptr;
END_SECTION

START_SECTION((void filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification)))
  ProteinIdentification protein_identification2;

  IDFilter::filterIdentificationsByProteins(protein_identification, proteins, protein_identification2);

  TEST_EQUAL(protein_identification2.getScoreType(), "Mascot")
  TEST_EQUAL(protein_identification2.getHits().size(), 2)
  TEST_EQUAL(protein_identification2.getHits()[0].getAccession(), "Q824A5")
  TEST_EQUAL(protein_identification2.getHits()[1].getAccession(), "Q872T5")
END_SECTION

START_SECTION((void filterIdentificationsByProteins(const PeptideIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers = false)))
  PeptideIdentification identification2;

  IDFilter::filterIdentificationsByProteins(identification, proteins, identification2);

  TEST_EQUAL(identification2.getScoreType(), "Mascot")
示例#9
0
  void IDFilter::removeUnreferencedPeptideHits(const ProteinIdentification& identification,
                                               vector<PeptideIdentification>& peptide_identifications,
                                               bool delete_unreferenced_peptide_hits /* = false */)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions
    set<String> all_prots;
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      all_prots.insert(temp_protein_hits[j].getAccession());
    }

    vector<PeptideIdentification> filtered_peptide_identifications;
    // remove peptides which are not referenced
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        vector<PeptideHit> filtered_pep_hits;
        // check protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          vector<PeptideEvidence> hit_peptide_evidences = tmp_pep_hits[j].getPeptideEvidences();
          vector<PeptideEvidence> valid_peptide_evidence;

          for (vector<PeptideEvidence>::const_iterator pe_it = hit_peptide_evidences.begin(); pe_it != hit_peptide_evidences.end(); ++pe_it)
          {
            // find valid proteins
            if (all_prots.find(pe_it->getProteinAccession()) != all_prots.end())
            {
              valid_peptide_evidence.push_back(*pe_it);
            }
          }

          if (!valid_peptide_evidence.empty() || !delete_unreferenced_peptide_hits)
          {
            // if present, copy the hit
            filtered_pep_hits.push_back(tmp_pep_hits[j]);
            filtered_pep_hits.back().setPeptideEvidences(valid_peptide_evidence);
          }
        }
        // if the peptide has hits, we use it
        if (!filtered_pep_hits.empty())
        {
          filtered_peptide_identifications.push_back(peptide_identifications[i]);
          filtered_peptide_identifications.back().setHits(filtered_pep_hits);
        }
      }
      else    // peptide is from another run, let it pass the filter‏
      {
        filtered_peptide_identifications.push_back(peptide_identifications[i]);
      }
    }

    // exchange with new hits
    filtered_peptide_identifications.swap(peptide_identifications);
  }
示例#10
0
  void IBSpectraFile::store(const String& filename, const ConsensusMap& cm)
  {
    // typdefs for shorter code
    typedef std::vector<ProteinHit>::iterator ProtHitIt;

    // general settings .. do we need to expose these?
    // ----------------------------------------------------------------------
    /// Allow also non-unique peptides to be exported
    bool allow_non_unique = true;
    /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying
    double intensity_threshold = 0.00001;
    // ----------------------------------------------------------------------


    // guess experiment type
    boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm);

    // we need the protein identifications to reference the protein names
    ProteinIdentification protIdent;
    bool has_proteinIdentifications = false;
    if (cm.getProteinIdentifications().size() > 0)
    {
      protIdent = cm.getProteinIdentifications()[0];
      has_proteinIdentifications = true;
    }

    // start the file by adding the tsv header
    TextFile textFile;
    textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t"));

    for (ConsensusMap::ConstIterator cm_iter = cm.begin();
         cm_iter != cm.end();
         ++cm_iter)
    {
      const ConsensusFeature& cFeature = *cm_iter;
      std::vector<IdCSV> entries;

      /// 1st we extract the identification information from the consensus feature
      if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications)
      {
        // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization
        entries.push_back(IdCSV());
      }
      else
      {
        // protein name:
        const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0];
        std::set<String> protein_accessions = peptide_hit.extractProteinAccessions();
        if (protein_accessions.size() != 1)
        {
          if (!allow_non_unique) continue; // we only want unique peptides
        }

        for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac)
        {
          IdCSV entry;
          entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge();
          entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();
          entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge());

          // write modif
          entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence());

          ProtHitIt proteinHit = protIdent.findHit(*prot_ac);
          if (proteinHit == protIdent.getHits().end())
          {
            std::cerr << "Protein referenced in peptide not found...\n";
            continue; // protein not found
          }

          entry.accession = proteinHit->getAccession();
          entries.push_back(entry);
        }
      }

      // 2nd we add the quantitative information of the channels

      // .. skip features with 0 intensity
      if (cFeature.getIntensity() == 0)
      {
        continue;
      }

      for (std::vector<IdCSV>::iterator entry = entries.begin();
           entry != entries.end();
           ++entry)
      {
        // set parent intensity
        entry->parent_intens = cFeature.getIntensity();
        entry->retention_time = cFeature.getRT();
        entry->spectrum = cFeature.getUniqueId();
        entry->exp_mass = cFeature.getMZ();

        // create output line
        StringList currentLine;

        // add entry to currentLine
        entry->toStringList(currentLine);

        // extract channel intensities and positions
        std::map<Int, double> intensityMap;
        ConsensusFeature::HandleSetType features = cFeature.getFeatures();

        for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin();
             fIt != features.end();
             ++fIt)
        {
          intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0);
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(it->center));
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(intensityMap[int(it->center)]));
        }

        textFile.addLine(ListUtils::concatenate(currentLine, "\t"));
      }
    }

    // write to file
    textFile.store(filename);
  }
示例#11
0
  void
  PepNovoOutfile::load(
    const std::string & result_filename,
    vector<PeptideIdentification> & peptide_identifications,
    ProteinIdentification & protein_identification,
    const double & score_threshold,
    const IndexPosMappingType & index_to_precursor,
    const map<String, String> & pnovo_modkey_to_mod_id
    )
  {
    // generally used variables
    StringList substrings;
    map<String, Int> columns;
    PeptideHit peptide_hit;

    String
      line,
      score_type = "PepNovo",
      version = "unknown",
      identifier,
      filename,
      sequence,
      sequence_with_mods;

    DateTime datetime = DateTime::now();     // there's no date given from PepNovo
    protein_identification.setDateTime(datetime);

    peptide_identifications.clear();
    PeptideIdentification peptide_identification;
    protein_identification = ProteinIdentification();

    // open the result
    ifstream result_file(result_filename.c_str());
    if (!result_file)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, result_filename);
    }

    Size line_number(0);     // used to report in which line an error occurred
    Size id_count(0);        // number of IDs seen (not necessarily the ones finally returned)

    getSearchEngineAndVersion(result_filename, protein_identification);
    //if information could not be retrieved from the outfile use defaults
    if (protein_identification.getSearchEngineVersion().empty())
    {
      protein_identification.setSearchEngine("PepNovo");
      protein_identification.setSearchEngineVersion(version);
    }
    identifier = protein_identification.getSearchEngine() + "_" + datetime.getDate();
    protein_identification.setIdentifier(identifier);

    map<String, String> mod_mask_map;
    const vector<String> & mods = protein_identification.getSearchParameters().variable_modifications;
    for (vector<String>::const_iterator mod_it = mods.begin(); mod_it != mods.end(); ++mod_it)
    {
      if (mod_it->empty())
        continue;
      //cout<<*mod_it<<endl;
      if (pnovo_modkey_to_mod_id.find(*mod_it) != pnovo_modkey_to_mod_id.end())
      {
        //cout<<keys_to_id.find(*mod_it)->second<<endl;
        ResidueModification tmp_mod = ModificationsDB::getInstance()->getModification(pnovo_modkey_to_mod_id.find(*mod_it)->second);
        if (mod_it->prefix(1) == "^" || mod_it->prefix(1) == "$")
        {
          mod_mask_map[*mod_it] = "(" + tmp_mod.getId() + ")";
        }
        else
        {
          mod_mask_map[*mod_it] = String(tmp_mod.getOrigin()) + "(" + tmp_mod.getId() + ")";
        }
      }
      else
      {
        if (mod_it->prefix(1) != "^" && mod_it->prefix(1) != "$")
        {
          mod_mask_map[*mod_it] = mod_it->prefix(1) + "[" + mod_it->substr(1) + "]";
          //cout<<mod_mask_map[*mod_it]<<endl;
        }
        else
        {
          mod_mask_map[*mod_it] = "[" + *mod_it + "]";
          //cout<<mod_mask_map[*mod_it]<<endl;
        }
      }
    }


    Size index;
    while (getline(result_file, line))
    {
      if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); // remove weird EOL character
      line.trim();
      ++line_number;
      if (line.hasPrefix(">> "))         // >> 1 /home/shared/pepnovo/4611_raw_ms2_picked.mzXML.1001.2.dta
      {
        ++id_count;
        if (!peptide_identification.empty() && !peptide_identification.getHits().empty())
        {
          peptide_identifications.push_back(peptide_identification);
        }

        line.split(' ', substrings);
        //String index = File::basename(line.substr(line.find(' ', strlen(">> ")) + 1));
        if (substrings.size() < 3)
        {
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns (spectrum Id) in file in line " + String(line_number) + String(" (should be 2 or more)!"), result_filename);
        }

        try
        {
          index = substrings[2].trim().toInt();
        }
        catch (...)
        {
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected an index number in line " + String(line_number) + String(" at position 2 (line was: '" + line + "')!"), result_filename);
        }

        //cout<<"INDEX: "<<index<<endl;
        peptide_identification = PeptideIdentification();
        bool success = false;
        if (index_to_precursor.size()>0)
        {
          if (index_to_precursor.find(index) != index_to_precursor.end())
          {
            peptide_identification.setRT(index_to_precursor.find(index)->second.first);
            peptide_identification.setMZ(index_to_precursor.find(index)->second.second);
            success = true;
          }
          else throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Index '" + String(index) + String("' in line '" + line + "' not found in index table (line was: '" + line + "')!"), result_filename);
        }

        if (!success)
        { // try to reconstruct from title entry (usually sensible when MGF is supplied to PepNovo)
          try
          {
            if (substrings.size() >= 4)
            {
              StringList parts = ListUtils::create<String>(substrings[3], '_');
              if (parts.size() >= 2)
              {
                peptide_identification.setRT(parts[1].toDouble());
                peptide_identification.setMZ(parts[0].toDouble());
                success = true;
              }
            }
          }
          catch (...)
          {

          }
          if (!success) throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Precursor could not be reconstructed from title '" + substrings[3] + String("' in line '" + line + "' (line was: '" + line + "')!"), result_filename);
        }
        peptide_identification.setSignificanceThreshold(score_threshold);
        peptide_identification.setScoreType(score_type);
        peptide_identification.setIdentifier(identifier);
      }
      else if (line.hasPrefix("#Index"))         // #Index  Prob    Score   N-mass  C-Mass  [M+H]   Charge  Sequence
      {
        if (columns.empty())           // map the column names to their column number
        {
          line.split('\t', substrings);
          for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i)
          {
            if ((*s_i) == "#Index")
              columns["Index"] = s_i - substrings.begin();
            else if ((*s_i) == "RnkScr")
              columns["RnkScr"] = s_i - substrings.begin();
            else if ((*s_i) == "PnvScr")
              columns["PnvScr"] = s_i - substrings.begin();
            else if ((*s_i) == "N-Gap")
              columns["N-Gap"] = s_i - substrings.begin();
            else if ((*s_i) == "C-Gap")
              columns["C-Gap"] = s_i - substrings.begin();
            else if ((*s_i) == "[M+H]")
              columns["[M+H]"] = s_i - substrings.begin();
            else if ((*s_i) == "Charge")
              columns["Charge"] = s_i - substrings.begin();
            else if ((*s_i) == "Sequence")
              columns["Sequence"] = s_i - substrings.begin();
          }

          if (columns.size() != 8)
          {
            result_file.close();
            result_file.clear();
            throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename);
          }
        }
        while (getline(result_file, line))
        {
          ++line_number;
          if (!line.empty() && (line[line.length() - 1] < 33))
            line.resize(line.length() - 1);
          line.trim();

          if (line.empty())
            break;

          line.split('\t', substrings);
          if (!substrings.empty())
          {
            if (substrings.size() != 8)
            {
              result_file.close();
              result_file.clear();
              throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename);
            }
            if (substrings[columns["RnkScr"]].toFloat() >= score_threshold)
            {
              peptide_hit = PeptideHit();
              peptide_hit.setCharge(substrings[columns["Charge"]].toInt());
              peptide_hit.setRank(substrings[columns["Index"]].toInt() + 1);
              peptide_hit.setScore(substrings[columns["RnkScr"]].toFloat());
              peptide_hit.setMetaValue("PnvScr", substrings[columns["PnvScr"]].toFloat());
              peptide_hit.setMetaValue("N-Gap", substrings[columns["N-Gap"]].toFloat());
              peptide_hit.setMetaValue("C-Gap", substrings[columns["C-Gap"]].toFloat());
              peptide_hit.setMetaValue("MZ", substrings[columns["[M+H]"]].toFloat());
              sequence = substrings[columns["Sequence"]];


              for (map<String, String>::iterator mask_it = mod_mask_map.begin(); mask_it != mod_mask_map.end(); ++mask_it)
              {
                if (mask_it->first.hasPrefix("^") && sequence.hasSubstring(mask_it->first))
                {
                  sequence.substitute(mask_it->first, "");
                  sequence = mask_it->second + sequence;
                }
                //cout<<mask_it->first<<" "<<mask_it->second<<endl;
                sequence.substitute(mask_it->first, mask_it->second);
              }
              peptide_hit.setSequence(AASequence::fromString(sequence));
              peptide_identification.insertHit(peptide_hit);
            }
          }
        }
      }
    }
    if (!peptide_identifications.empty() || !peptide_identification.getHits().empty())
    {
      peptide_identifications.push_back(peptide_identification);
    }

    result_file.close();
    result_file.clear();

    LOG_INFO << "Parsed " << id_count << " ids, retained " << peptide_identifications.size() << "." << std::endl;

  }
示例#12
0
  void
  PepNovoOutfile::getSearchEngineAndVersion(
    const String & pepnovo_output_without_parameters_filename,
    ProteinIdentification & protein_identification)
  {
    ifstream pepnovo_output_without_parameters(pepnovo_output_without_parameters_filename.c_str());
    if (!pepnovo_output_without_parameters)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, pepnovo_output_without_parameters_filename);
    }

    ProteinIdentification::SearchParameters search_param;
    // searching for something like this: PepNovo v1.03
    String line;
    vector<String> substrings;
    while (getline(pepnovo_output_without_parameters, line))
    {
      if (!line.empty() && (line[line.length() - 1] < 33))
        line.resize(line.length() - 1);
      line.trim();
      if (line.empty())
        continue;
      if (line.hasPrefix("PepNovo"))
      {
        line.split(',', substrings);
        if (substrings.size() == 2) //previous version of PepNovo
        {
          protein_identification.setSearchEngine(substrings[0].trim());
          protein_identification.setSearchEngineVersion(substrings[1].trim()); //else something is strange and we use defaults later
        }
        else
        {
          line.split(' ', substrings);
          if (substrings.size() == 3)
          {
            protein_identification.setSearchEngine(substrings[0].trim());
            protein_identification.setSearchEngineVersion(substrings[2].trim()); //else something is strange and we use defaults later
          }
        }
      }
      if (line.hasPrefix("PM"))
      {
        line.split(' ', substrings);
        search_param.precursor_mass_tolerance = substrings.back().toFloat();
      }
      if (line.hasPrefix("Fragment"))
      {
        line.split(' ', substrings);
        search_param.fragment_mass_tolerance = substrings.back().toFloat();
      }
      if (line.hasPrefix("PTM"))
      {
        line.split(':', substrings);
        substrings.erase(substrings.begin());
        for (vector<String>::iterator ptm_it = substrings.begin(); ptm_it != substrings.end(); ++ptm_it)
        {
          ptm_it->trim();
        }
        if (!substrings.empty() && substrings[0] != "None")
        {
          search_param.variable_modifications = substrings;
        }
      }
      if (line.hasPrefix(">>"))
      {
        break;
      }
    }
    protein_identification.setSearchParameters(search_param);
  }