Esempio n. 1
0
  void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification, const vector<PeptideIdentification> peptide_identifications, ProteinIdentification& filtered_identification)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions that are referenced by peptides
    set<String> proteinaccessions_with_peptides;
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        // extract protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          const std::vector<String>& protein_accessions = tmp_pep_hits[j].getProteinAccessions();
          for (Size k = 0; k != protein_accessions.size(); ++k)
          {
            String key = protein_accessions[k];
            proteinaccessions_with_peptides.insert(key);
          }
        }
      }
    }

    // add all protein hits referenced by a peptide
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    vector<ProteinHit> filtered_protein_hits;
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      const String& protein_accession = temp_protein_hits[j].getAccession();
      if (proteinaccessions_with_peptides.find(protein_accession) != proteinaccessions_with_peptides.end())
      {
        filtered_protein_hits.push_back(temp_protein_hits[j]);
      }
    }

    // copy identification
    filtered_identification = identification;

    // assign filtered hits to protein identification
    filtered_identification.setHits(filtered_protein_hits);
  }
Esempio n. 2
0
END_SECTION

START_SECTION(void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids))
{
	ProtXMLFile f;
	ProteinIdentification proteins;
	PeptideIdentification peptides;
	String prot_file;

  StringList ids = ListUtils::create<String>("16627578304933075941,13229490167902618598");

	// we do this twice, just to check that members are correctly reset etc..
	for (Int i=0;i<2;++i)
	{
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_1.protXML");
		f.load(prot_file, proteins, peptides);
		TEST_EQUAL(proteins.getIdentifier(), ids[i]);
		TEST_EQUAL(peptides.getIdentifier(), ids[i]);
	
		// groups	
		TEST_EQUAL(proteins.getProteinGroups().size(), 7);
		TEST_EQUAL(proteins.getProteinGroups()[0].probability, 0.9990);
		TEST_EQUAL(proteins.getProteinGroups()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getProteinGroups()[6].probability, 0.2026);
		TEST_EQUAL(proteins.getProteinGroups()[6].accessions.size(), 1);

		TEST_EQUAL(proteins.getIndistinguishableProteins().size(), 7);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[0].accessions.size(), 1);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions.size(), 2);
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[0], 
							 "P01876|IGHA1_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[3].accessions[1], 
							 "P01877|IGHA2_HUMAN");
		TEST_EQUAL(proteins.getIndistinguishableProteins()[6].accessions.size(), 1);

		// proteins
		TEST_EQUAL(proteins.getHits().size(), 9);
		TEST_EQUAL(proteins.getHits()[0].getAccession(), "P02787|TRFE_HUMAN");
		TEST_EQUAL(proteins.getHits()[0].getCoverage(), 8.6);
		TEST_EQUAL(proteins.getHits()[0].getScore(), 0.9990);
    // this one is indistinguishable... therefore it should have minimal infos
		TEST_EQUAL(proteins.getHits()[6].getAccession(), "P00739|HPTR_HUMAN");
		TEST_EQUAL(proteins.getHits()[6].getCoverage(), -1);
		TEST_EQUAL(proteins.getHits()[6].getScore(), -1);

		TEST_EQUAL(proteins.getHits()[8].getAccession(), "P04217|A1BG_HUMAN");
		TEST_EQUAL(proteins.getHits()[8].getCoverage(), 2.0);
		TEST_EQUAL(proteins.getHits()[8].getScore(), 0.2026);
	
		// peptides
		TEST_EQUAL(peptides.getHits().size(), 16);
		AASequence aa_seq("MYLGYEYVTAIR");
		TEST_EQUAL(peptides.getHits()[0].getSequence(), aa_seq);
		TEST_EQUAL(peptides.getHits()[0].getCharge(), 2);
		TEST_EQUAL(peptides.getHits()[0].getScore(), 0.8633);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions().size(), 1);
		TEST_EQUAL(peptides.getHits()[0].getProteinAccessions()[0], "P02787|TRFE_HUMAN");
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_unique"), true);
		TEST_EQUAL(peptides.getHits()[0].getMetaValue("is_contributing"), true);
			
		// load 2 nd file and
		prot_file = OPENMS_GET_TEST_DATA_PATH("ProtXMLFile_input_2.protXML");
		
	}
}
Esempio n. 3
0
  void IDFilter::removeUnreferencedPeptideHits(const ProteinIdentification& identification,
                                               vector<PeptideIdentification>& peptide_identifications,
                                               bool delete_unreferenced_peptide_hits /* = false */)
  {
    const String& run_identifier = identification.getIdentifier();

    // build set of protein accessions
    set<String> all_prots;
    const vector<ProteinHit>& temp_protein_hits = identification.getHits();
    for (Size j = 0; j != temp_protein_hits.size(); ++j)
    {
      all_prots.insert(temp_protein_hits[j].getAccession());
    }

    vector<PeptideIdentification> filtered_peptide_identifications;
    // remove peptides which are not referenced
    for (Size i = 0; i != peptide_identifications.size(); ++i)
    {
      // run id of protein and peptide identification must match
      if (run_identifier == peptide_identifications[i].getIdentifier())
      {
        const vector<PeptideHit>& tmp_pep_hits = peptide_identifications[i].getHits();
        vector<PeptideHit> filtered_pep_hits;
        // check protein accessions of each peptide hit
        for (Size j = 0; j != tmp_pep_hits.size(); ++j)
        {
          vector<PeptideEvidence> hit_peptide_evidences = tmp_pep_hits[j].getPeptideEvidences();
          vector<PeptideEvidence> valid_peptide_evidence;

          for (vector<PeptideEvidence>::const_iterator pe_it = hit_peptide_evidences.begin(); pe_it != hit_peptide_evidences.end(); ++pe_it)
          {
            // find valid proteins
            if (all_prots.find(pe_it->getProteinAccession()) != all_prots.end())
            {
              valid_peptide_evidence.push_back(*pe_it);
            }
          }

          if (!valid_peptide_evidence.empty() || !delete_unreferenced_peptide_hits)
          {
            // if present, copy the hit
            filtered_pep_hits.push_back(tmp_pep_hits[j]);
            filtered_pep_hits.back().setPeptideEvidences(valid_peptide_evidence);
          }
        }
        // if the peptide has hits, we use it
        if (!filtered_pep_hits.empty())
        {
          filtered_peptide_identifications.push_back(peptide_identifications[i]);
          filtered_peptide_identifications.back().setHits(filtered_pep_hits);
        }
      }
      else    // peptide is from another run, let it pass the filter‏
      {
        filtered_peptide_identifications.push_back(peptide_identifications[i]);
      }
    }

    // exchange with new hits
    filtered_peptide_identifications.swap(peptide_identifications);
  }