Esempio n. 1
0
  //Visualizing ProteinIdentification object
  void MetaDataBrowser::visualize_(ProteinIdentification & meta, QTreeWidgetItem * parent)
  {
    ProteinIdentificationVisualizer * visualizer = new ProteinIdentificationVisualizer(isEditable(), this, this);

    QStringList labels;
    int id = ws_->addWidget(visualizer);
    labels << QString("ProteinIdentification %1").arg(meta.getSearchEngine().c_str()) << QString::number(id);

    visualizer->load(meta, id);

    QTreeWidgetItem * item;
    if (parent == nullptr)
    {
      item = new QTreeWidgetItem(treeview_, labels);
    }
    else
    {
      item = new QTreeWidgetItem(parent, labels);
    }

    //check for proteinhits objects
    meta.assignRanks();

    for (Size i = 0; i < meta.getHits().size(); ++i)
    {
      visualize_(const_cast<ProteinHit &>(meta.getHits()[i]), item);
    }

    visualize_(dynamic_cast<MetaInfoInterface &>(meta), item);

    connectVisualizer_(visualizer);
  }
Esempio n. 2
0
  void IDFilter::filterIdentificationsByProteins(const ProteinIdentification& identification,
                                                 const vector<FASTAFile::FASTAEntry>& proteins,
                                                 ProteinIdentification& filtered_identification)
  {
    String protein_sequences;
    String accession_sequences;
    vector<ProteinHit> filtered_protein_hits;
    ProteinHit temp_protein_hit;

    filtered_identification = identification;
    filtered_identification.setHits(vector<ProteinHit>());

    for (Size i = 0; i < proteins.size(); i++)
    {
      accession_sequences.append("*" + proteins[i].identifier);
    }
    accession_sequences.append("*");

    for (Size i = 0; i < identification.getHits().size(); i++)
    {
      if (accession_sequences.find("*" + identification.getHits()[i].getAccession()) != String::npos)
      {
        filtered_protein_hits.push_back(identification.getHits()[i]);
      }
    }

    filtered_identification.setHits(filtered_protein_hits);
    filtered_identification.assignRanks();
  }
Esempio n. 3
0
  void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification, const vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions that are referenced by peptides
    set<String> proteinaccessions_with_peptides;
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        // extract protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          const std::vector<String>& protein_accessions = tmp_pep_hits[j].getProteinAccessions();
          for (Size k = 0; k != protein_accessions.size(); ++k)
          {
            String key = protein_accessions[k];
            proteinaccessions_with_peptides.insert(key);
          }
        }
      }
    }

    // add all protein hits referenced by a peptide
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    vector<ProteinHit> filtered_protein_hits;
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      const String& protein_accession = temp_protein_hits[j].getAccession();
      if (proteinaccessions_with_peptides.find(protein_accession) != proteinaccessions_with_peptides.end())
      {
        filtered_protein_hits.push_back(temp_protein_hits[j]);
      }
    }

    // copy identification
    filtered_identification = identification;

    // assign filtered hits to protein identification
    filtered_identification.setHits(filtered_protein_hits);
  }
Esempio n. 4
0
PeptideHit peptide_hit;

START_SECTION((XTandemXMLFile()))
	ptr = new XTandemXMLFile();
	TEST_NOT_EQUAL(ptr, nullPointer)
END_SECTION

START_SECTION(~XTandemXMLFile())
	delete ptr;
END_SECTION

ptr = new XTandemXMLFile();

START_SECTION(void setModificationDefinitionsSet(const ModificationDefinitionsSet &rhs))
	ModificationDefinitionsSet mod_set(ListUtils::create<String>(""), ListUtils::create<String>("Carbamidomethyl (C),Oxidation (M),Carboxymethyl (C)"));

	ptr->setModificationDefinitionsSet(mod_set);
	NOT_TESTABLE
END_SECTION

START_SECTION(void load(const String& filename, ProteinIdentification& protein_identification, std::vector<PeptideIdentification>& id_data))
	ptr->load(OPENMS_GET_TEST_DATA_PATH("XTandemXMLFile_test.xml"), protein_identification, peptide_identifications);
	TEST_EQUAL(peptide_identifications.size(), 303)
	TEST_EQUAL(protein_identification.getHits().size(), 497)
	ptr->load(OPENMS_GET_TEST_DATA_PATH("XTandemXMLFile_test_2.xml"), protein_identification, peptide_identifications);
END_SECTION

/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
END_TEST
Esempio n. 5
0
END_SECTION

START_SECTION(void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids))
{
	ProtXMLFile f;
	ProteinIdentification proteins;
	PeptideIdentification peptides;
	String prot_file;

  StringList ids = ListUtils::create<String>("16627578304933075941,13229490167902618598");

	// we do this twice, just to check that members are correctly reset etc..
	for (Int i=0;i<2;++i)
	{
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_1.protXML");
		f.load(prot_file, proteins, peptides);
		TEST_EQUAL(proteins.getIdentifier(), ids[i]);
		TEST_EQUAL(peptides.getIdentifier(), ids[i]);
	
		// groups	
		TEST_EQUAL(proteins.getProteinGroups().size(), 7);
		TEST_EQUAL(proteins.getProteinGroups()[0].probability, 0.9990);
		TEST_EQUAL(proteins.getProteinGroups()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[6].probability, 0.2026);
		TEST_EQUAL(proteins.getProteinGroups()[6].accessions.size(), 1);

		TEST_EQUAL(proteins.getIndistinguishableProteins().size(), 7);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[6].accessions.size(), 1);

		// proteins
		TEST_EQUAL(proteins.getHits().size(), 9);
		TEST_EQUAL(proteins.getHits()[0].getAccession(), "P02787|TRFE_HUMAN");
		TEST_EQUAL(proteins.getHits()[0].getCoverage(), 8.6);
		TEST_EQUAL(proteins.getHits()[0].getScore(), 0.9990);
    // this one is indistinguishable... therefore it should have minimal infos
		TEST_EQUAL(proteins.getHits()[6].getAccession(), "P00739|HPTR_HUMAN");
		TEST_EQUAL(proteins.getHits()[6].getCoverage(), -1);
		TEST_EQUAL(proteins.getHits()[6].getScore(), -1);

		TEST_EQUAL(proteins.getHits()[8].getAccession(), "P04217|A1BG_HUMAN");
		TEST_EQUAL(proteins.getHits()[8].getCoverage(), 2.0);
		TEST_EQUAL(proteins.getHits()[8].getScore(), 0.2026);
	
		// peptides
		TEST_EQUAL(peptides.getHits().size(), 16);
		AASequence aa_seq("MYLGYEYVTAIR");
		TEST_EQUAL(peptides.getHits()[0].getSequence(), aa_seq);
		TEST_EQUAL(peptides.getHits()[0].getCharge(), 2);
		TEST_EQUAL(peptides.getHits()[0].getScore(), 0.8633);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions().size(), 1);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions()[0], "P02787|TRFE_HUMAN");
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_unique"), true);
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_contributing"), true);
			
		// load 2 nd file and
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_2.protXML");
		
	}
}
START_SECTION((IDFilter()))
  ptr = new IDFilter();
  TEST_NOT_EQUAL(ptr, nullPointer);
END_SECTION

START_SECTION((~IDFilter()))
  delete ptr;
END_SECTION

START_SECTION((void filterIdentificationsByProteins(const ProteinIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, ProteinIdentification& filtered_identification)))
  ProteinIdentification protein_identification2;

  IDFilter::filterIdentificationsByProteins(protein_identification, proteins, protein_identification2);

  TEST_EQUAL(protein_identification2.getScoreType(), "Mascot")
  TEST_EQUAL(protein_identification2.getHits().size(), 2)
  TEST_EQUAL(protein_identification2.getHits()[0].getAccession(), "Q824A5")
  TEST_EQUAL(protein_identification2.getHits()[1].getAccession(), "Q872T5")
END_SECTION

START_SECTION((void filterIdentificationsByProteins(const PeptideIdentification& identification, const vector<FASTAFile::FASTAEntry>& proteins, PeptideIdentification& filtered_identification, bool no_protein_identifiers = false)))
  PeptideIdentification identification2;

  IDFilter::filterIdentificationsByProteins(identification, proteins, identification2);

  TEST_EQUAL(identification2.getScoreType(), "Mascot")
  TEST_EQUAL(identification2.getHits().size(), 2)
  TEST_EQUAL(identification2.getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK"))
  TEST_EQUAL(identification2.getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK"))
END_SECTION
Esempio n. 7
0
  void IDFilter::removeUnreferencedPeptideHits(const ProteinIdentification& identification,
                                               vector<PeptideIdentification>& peptide_identifications,
                                               bool delete_unreferenced_peptide_hits /* = false */)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions
    set<String> all_prots;
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      all_prots.insert(temp_protein_hits[j].getAccession());
    }

    vector<PeptideIdentification> filtered_peptide_identifications;
    // remove peptides which are not referenced
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        vector<PeptideHit> filtered_pep_hits;
        // check protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          vector<PeptideEvidence> hit_peptide_evidences = tmp_pep_hits[j].getPeptideEvidences();
          vector<PeptideEvidence> valid_peptide_evidence;

          for (vector<PeptideEvidence>::const_iterator pe_it = hit_peptide_evidences.begin(); pe_it != hit_peptide_evidences.end(); ++pe_it)
          {
            // find valid proteins
            if (all_prots.find(pe_it->getProteinAccession()) != all_prots.end())
            {
              valid_peptide_evidence.push_back(*pe_it);
            }
          }

          if (!valid_peptide_evidence.empty() || !delete_unreferenced_peptide_hits)
          {
            // if present, copy the hit
            filtered_pep_hits.push_back(tmp_pep_hits[j]);
            filtered_pep_hits.back().setPeptideEvidences(valid_peptide_evidence);
          }
        }
        // if the peptide has hits, we use it
        if (!filtered_pep_hits.empty())
        {
          filtered_peptide_identifications.push_back(peptide_identifications[i]);
          filtered_peptide_identifications.back().setHits(filtered_pep_hits);
        }
      }
      else    // peptide is from another run, let it pass the filter‏
      {
        filtered_peptide_identifications.push_back(peptide_identifications[i]);
      }
    }

    // exchange with new hits
    filtered_peptide_identifications.swap(peptide_identifications);
  }
Esempio n. 8
0
  void IBSpectraFile::store(const String& filename, const ConsensusMap& cm)
  {
    // typdefs for shorter code
    typedef std::vector<ProteinHit>::iterator ProtHitIt;

    // general settings .. do we need to expose these?
    // ----------------------------------------------------------------------
    /// Allow also non-unique peptides to be exported
    bool allow_non_unique = true;
    /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying
    double intensity_threshold = 0.00001;
    // ----------------------------------------------------------------------


    // guess experiment type
    boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm);

    // we need the protein identifications to reference the protein names
    ProteinIdentification protIdent;
    bool has_proteinIdentifications = false;
    if (cm.getProteinIdentifications().size() > 0)
    {
      protIdent = cm.getProteinIdentifications()[0];
      has_proteinIdentifications = true;
    }

    // start the file by adding the tsv header
    TextFile textFile;
    textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t"));

    for (ConsensusMap::ConstIterator cm_iter = cm.begin();
         cm_iter != cm.end();
         ++cm_iter)
    {
      const ConsensusFeature& cFeature = *cm_iter;
      std::vector<IdCSV> entries;

      /// 1st we extract the identification information from the consensus feature
      if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications)
      {
        // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization
        entries.push_back(IdCSV());
      }
      else
      {
        // protein name:
        const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0];
        std::set<String> protein_accessions = peptide_hit.extractProteinAccessions();
        if (protein_accessions.size() != 1)
        {
          if (!allow_non_unique) continue; // we only want unique peptides
        }

        for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac)
        {
          IdCSV entry;
          entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge();
          entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();
          entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge());

          // write modif
          entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence());

          ProtHitIt proteinHit = protIdent.findHit(*prot_ac);
          if (proteinHit == protIdent.getHits().end())
          {
            std::cerr << "Protein referenced in peptide not found...\n";
            continue; // protein not found
          }

          entry.accession = proteinHit->getAccession();
          entries.push_back(entry);
        }
      }

      // 2nd we add the quantitative information of the channels

      // .. skip features with 0 intensity
      if (cFeature.getIntensity() == 0)
      {
        continue;
      }

      for (std::vector<IdCSV>::iterator entry = entries.begin();
           entry != entries.end();
           ++entry)
      {
        // set parent intensity
        entry->parent_intens = cFeature.getIntensity();
        entry->retention_time = cFeature.getRT();
        entry->spectrum = cFeature.getUniqueId();
        entry->exp_mass = cFeature.getMZ();

        // create output line
        StringList currentLine;

        // add entry to currentLine
        entry->toStringList(currentLine);

        // extract channel intensities and positions
        std::map<Int, double> intensityMap;
        ConsensusFeature::HandleSetType features = cFeature.getFeatures();

        for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin();
             fIt != features.end();
             ++fIt)
        {
          intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0);
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(it->center));
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(intensityMap[int(it->center)]));
        }

        textFile.addLine(ListUtils::concatenate(currentLine, "\t"));
      }
    }

    // write to file
    textFile.store(filename);
  }