C++ (Cpp) EnzymaticDigestion Examples

Programming Language: C++ (Cpp)

Examples at hotexamples.com: 6

C++ (Cpp) EnzymaticDigestion - 6 examples found. These are the top rated real world C++ (Cpp) examples of EnzymaticDigestion extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMissedCleavages(4)

setEnzyme(4)

digest(3)

getEnzymeByName(2)

getSpecificity(2)

peptideCount(2)

getEnzyme(1)

getEnzymeName(1)

getLogThreshold(1)

getMissedCleavages(1)

getSpecificityByName(1)

isLogModelEnabled(1)

isValidProduct(1)

setLogModelEnabled(1)

setLogThreshold(1)

setSpecificity(1)

Example #1

Show file

File: DigestSimulation.cpp Project: BioinformaticsArchive/OpenMS

  void DigestSimulation::digest(SimTypes::FeatureMapSim& feature_map)
  {
    LOG_INFO << "Digest Simulation ... started" << std::endl;

    if ((String)param_.getValue("enzyme") == String("none"))
    {
      //peptides = proteins;
      // convert all proteins into peptides

      // for each protein_hit in the FeatureMap
      for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
           protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
           ++protein_hit)
      {
        // generate a PeptideHit hit with the correct link to the protein
        PeptideHit pep_hit(1.0, 1, 0, AASequence::fromString(protein_hit->getSequence()));
        PeptideEvidence pe;
        pe.setProteinAccession(protein_hit->getAccession());
        pep_hit.addPeptideEvidence(pe);

        // add the PeptideHit to the PeptideIdentification
        PeptideIdentification pep_id;
        pep_id.insertHit(pep_hit);

        // generate Feature with correct Intensity and corresponding PeptideIdentification
        Feature f;
        f.getPeptideIdentifications().push_back(pep_id);
        f.setIntensity(protein_hit->getMetaValue("intensity"));

        // copy intensity meta-values and additional annotations from Protein to Feature
        StringList keys;
        protein_hit->getKeys(keys);
        for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
        {
          f.setMetaValue(*it_key, protein_hit->getMetaValue(*it_key));
        }

        // add Feature to SimTypes::FeatureMapSim
        feature_map.push_back(f);
      }

      return;
    }


    UInt min_peptide_length = param_.getValue("min_peptide_length");
    bool use_log_model = param_.getValue("model") == "trained" ? true : false;
    UInt missed_cleavages = param_.getValue("model_naive:missed_cleavages");
    double cleave_threshold = param_.getValue("model_trained:threshold");

    EnzymaticDigestion digestion;
    digestion.setEnzyme(digestion.getEnzymeByName((String)param_.getValue("enzyme")));
    digestion.setLogModelEnabled(use_log_model);
    digestion.setLogThreshold(cleave_threshold);

    std::vector<AASequence> digestion_products;

    // keep track of generated features
    std::map<AASequence, Feature> generated_features;

    // Iterate through ProteinHits in the FeatureMap and digest them
    for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
         protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
         ++protein_hit)
    {
      // determine abundance of each digestion product (this is quite long now...)
      // we assume that each digestion product will have the same abundance
      // note: missed cleavages reduce overall abundance as they combine two (or more) single peptides

      // how many "atomic"(i.e. non-cleavable) peptides are created?
      digestion.setMissedCleavages(0);
      Size complete_digest_count = digestion.peptideCount(AASequence::fromString(protein_hit->getSequence()));
      // compute average number of "atomic" peptides summed from all digestion products
      Size number_atomic_whole = 0;
      Size number_of_digestion_products = 0;
      for (Size i = 0; (i <= missed_cleavages) && (i < complete_digest_count); ++i)
      {
        number_atomic_whole += (complete_digest_count - i) * (i + 1);
        number_of_digestion_products += (complete_digest_count - i);
      }

      // mean number of "atomic" peptides per digestion product is now: number_atomic_whole / number_of_digestion_products
      // -> thus abundance of a digestion product is: #proteins / avg#of"atomic"peptides
      // i.e.: protein->second / (number_atomic_whole / number_of_digestion_products)

      Map<String, SimTypes::SimIntensityType> intensities;
      StringList keys;
      protein_hit->getKeys(keys);
      for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
      {
        if (!it_key->hasPrefix("intensity"))
          continue;
        intensities[*it_key] = std::max(SimTypes::SimIntensityType(1), SimTypes::SimIntensityType(protein_hit->getMetaValue(*it_key))
                                        * SimTypes::SimIntensityType(number_of_digestion_products)
                                        / SimTypes::SimIntensityType(number_atomic_whole)); // order changed for numeric stability
      }

      // do real digest
      digestion.setMissedCleavages(missed_cleavages);
      digestion.digest(AASequence::fromString(protein_hit->getSequence()), digestion_products);

      for (std::vector<AASequence>::const_iterator dp_it = digestion_products.begin();
           dp_it != digestion_products.end();
           ++dp_it)
      {
        if (dp_it->size() < min_peptide_length)
          continue;

        // sum equal peptide's intensities
        // *dp_it -> peptide
        // If we see this Peptide the first time -> generate corresponding feature
        if (generated_features.count(*dp_it) == 0)
        {
          PeptideHit pep_hit(1.0, 1, 0, *dp_it);

          PeptideIdentification pep_id;
          pep_id.insertHit(pep_hit);

          // create feature
          Feature f;
          f.getPeptideIdentifications().push_back(pep_id);
          // set intensity to 0 to avoid problems when summing up
          f.setIntensity(0.0);

          // copy all non-intensity meta values
          StringList lkeys;
          protein_hit->getKeys(lkeys);
          for (StringList::iterator key = lkeys.begin(); key != lkeys.end(); ++key)
          {
            if (!key->hasPrefix("intensity"))
            {
              f.setMetaValue(*key, protein_hit->getMetaValue(*key));
            }
          }

          // insert into map
          generated_features.insert(std::make_pair(*dp_it, f));
        }

        // sum up intensity values
        generated_features[*dp_it].setIntensity(generated_features[*dp_it].getIntensity() + intensities["intensity"]);
        // ... same for other intensities (iTRAQ...)
        for (Map<String, SimTypes::SimIntensityType>::const_iterator it_other = intensities.begin(); it_other != intensities.end(); ++it_other)
        {
          if (!generated_features[*dp_it].metaValueExists(it_other->first))
          {
            generated_features[*dp_it].setMetaValue(it_other->first, it_other->second);
          }
          else
          {
            generated_features[*dp_it].setMetaValue(it_other->first, SimTypes::SimIntensityType(generated_features[*dp_it].getMetaValue(it_other->first)) + it_other->second);
          }
        }

        // add current protein accession
        // existing proteins accessions ...
        std::set<String> protein_accessions = generated_features[*dp_it].getPeptideIdentifications()[0].getHits()[0].extractProteinAccessions();

        // ... add accession of current protein
        protein_accessions.insert(protein_hit->getAccession());

        std::vector<PeptideIdentification> pep_idents = generated_features[*dp_it].getPeptideIdentifications();
        std::vector<PeptideHit> pep_hits = pep_idents[0].getHits();

        for (std::set<String>::const_iterator s_it = protein_accessions.begin(); s_it != protein_accessions.end(); ++s_it)
        {
          PeptideEvidence pe;
          pe.setProteinAccession(*s_it);
          pep_hits[0].addPeptideEvidence(pe);
        }
        pep_idents[0].setHits(pep_hits);
        generated_features[*dp_it].setPeptideIdentifications(pep_idents);
      }
    }

    // add generated_features to FeatureMap
    for (std::map<AASequence, Feature>::iterator it_gf = generated_features.begin();
         it_gf != generated_features.end();
         ++it_gf)
    {
      // round up intensity
      (it_gf->second).setIntensity(ceil((it_gf->second).getIntensity()));
      feature_map.push_back(it_gf->second);
    }

  }

Example #2

Show file

File: IDFilter.cpp Project: FabianAicheler/OpenMS

  ExitCodes main_(int, const char**)
  {
    String inputfile_name = getStringOption_("in");
    String outputfile_name = getStringOption_("out");

    vector<ProteinIdentification> proteins;
    vector<PeptideIdentification> peptides;
    IdXMLFile().load(inputfile_name, proteins, peptides);

    Size n_prot_ids = proteins.size();
    Size n_prot_hits = IDFilter::countHits(proteins);
    Size n_pep_ids = peptides.size();
    Size n_pep_hits = IDFilter::countHits(peptides);


    // Filtering peptide identification according to set criteria

    double rt_high = numeric_limits<double>::infinity(), rt_low = -rt_high;
    if (parseRange_(getStringOption_("precursor:rt"), rt_low, rt_high))
    {
      LOG_INFO << "Filtering peptide IDs by precursor RT..." << endl;
      IDFilter::filterPeptidesByRT(peptides, rt_low, rt_high);
    }

    double mz_high = numeric_limits<double>::infinity(), mz_low = -mz_high;
    if (parseRange_(getStringOption_("precursor:mz"), mz_low, mz_high))
    {
      LOG_INFO << "Filtering peptide IDs by precursor m/z...";
      IDFilter::filterPeptidesByMZ(peptides, mz_low, mz_high);
    }


    // Filtering peptide hits according to set criteria

    if (getFlag_("unique"))
    {
      LOG_INFO << "Removing duplicate peptide hits..." << endl;
      IDFilter::removeDuplicatePeptideHits(peptides);
    }

    if (getFlag_("unique_per_protein"))
    {
      LOG_INFO << "Filtering peptides by unique match to a protein..." << endl;
      IDFilter::keepUniquePeptidesPerProtein(peptides);
    }

    double peptide_significance = getDoubleOption_("thresh:pep");
    if (peptide_significance > 0)
    {
      LOG_INFO << "Filtering by peptide significance threshold..." << endl;
      IDFilter::filterHitsBySignificance(peptides, peptide_significance);
    }

    double pred_rt_pv = getDoubleOption_("rt:p_value");
    if (pred_rt_pv > 0)
    {
      LOG_INFO << "Filtering by RT prediction p-value..." << endl;
      IDFilter::filterPeptidesByRTPredictPValue(
        peptides, "predicted_RT_p_value", pred_rt_pv);
    }

    double pred_rt_pv_1d = getDoubleOption_("rt:p_value_1st_dim");
    if (pred_rt_pv_1d > 0)
    {
      LOG_INFO << "Filtering by RT prediction p-value (first dim.)..." << endl;
      IDFilter::filterPeptidesByRTPredictPValue(
        peptides, "predicted_RT_p_value_first_dim", pred_rt_pv_1d);
    }

    String whitelist_fasta = getStringOption_("whitelist:proteins").trim();
    if (!whitelist_fasta.empty())
    {
      LOG_INFO << "Filtering by protein whitelisting (FASTA input)..." << endl;
      // load protein accessions from FASTA file:
      vector<FASTAFile::FASTAEntry> fasta;
      FASTAFile().load(whitelist_fasta, fasta);
      set<String> accessions;
      for (vector<FASTAFile::FASTAEntry>::iterator it = fasta.begin();
           it != fasta.end(); ++it)
      {
        accessions.insert(it->identifier);
      }
      IDFilter::keepHitsMatchingProteins(peptides, accessions);
      IDFilter::keepHitsMatchingProteins(proteins, accessions);
    }

    vector<String> whitelist_accessions =
      getStringList_("whitelist:protein_accessions");
    if (!whitelist_accessions.empty())
    {
      LOG_INFO << "Filtering by protein whitelisting (accessions input)..."
               << endl;
      set<String> accessions(whitelist_accessions.begin(),
                             whitelist_accessions.end());
      IDFilter::keepHitsMatchingProteins(peptides, accessions);
      IDFilter::keepHitsMatchingProteins(proteins, accessions);
    }

    String whitelist_peptides = getStringOption_("whitelist:peptides").trim();
    if (!whitelist_peptides.empty())
    {
      LOG_INFO << "Filtering by inclusion peptide whitelisting..." << endl;
      vector<PeptideIdentification> inclusion_peptides;
      vector<ProteinIdentification> inclusion_proteins; // ignored
      IdXMLFile().load(whitelist_peptides, inclusion_proteins,
                       inclusion_peptides);
      bool ignore_mods = getFlag_("whitelist:ignore_modifications");
      IDFilter::keepPeptidesWithMatchingSequences(peptides, inclusion_peptides,
                                                  ignore_mods);
    }

    vector<String> whitelist_mods = getStringList_("whitelist:modifications");
    if (!whitelist_mods.empty())
    {
      LOG_INFO << "Filtering peptide IDs by modification whitelisting..."
               << endl;
      set<String> good_mods(whitelist_mods.begin(), whitelist_mods.end());
      IDFilter::keepPeptidesWithMatchingModifications(peptides, good_mods);
    }

    String blacklist_fasta = getStringOption_("blacklist:proteins").trim();
    if (!blacklist_fasta.empty())
    {
      LOG_INFO << "Filtering by protein blacklisting (FASTA input)..." << endl;
      // load protein accessions from FASTA file:
      vector<FASTAFile::FASTAEntry> fasta;
      FASTAFile().load(blacklist_fasta, fasta);
      set<String> accessions;
      for (vector<FASTAFile::FASTAEntry>::iterator it = fasta.begin();
           it != fasta.end(); ++it)
      {
        accessions.insert(it->identifier);
      }
      IDFilter::removeHitsMatchingProteins(peptides, accessions);
      IDFilter::removeHitsMatchingProteins(proteins, accessions);
    }

    vector<String> blacklist_accessions =
      getStringList_("blacklist:protein_accessions");
    if (!blacklist_accessions.empty())
    {
      LOG_INFO << "Filtering by protein blacklisting (accessions input)..."
               << endl;
      set<String> accessions(blacklist_accessions.begin(),
                             blacklist_accessions.end());
      IDFilter::removeHitsMatchingProteins(peptides, accessions);
      IDFilter::removeHitsMatchingProteins(proteins, accessions);
    }

    String blacklist_peptides = getStringOption_("blacklist:peptides").trim();
    if (!blacklist_peptides.empty())
    {
      LOG_INFO << "Filtering by exclusion peptide blacklisting..." << endl;
      vector<PeptideIdentification> exclusion_peptides;
      vector<ProteinIdentification> exclusion_proteins; // ignored
      IdXMLFile().load(blacklist_peptides, exclusion_proteins,
                       exclusion_peptides);
      bool ignore_mods = getFlag_("blacklist:ignore_modifications");
      IDFilter::removePeptidesWithMatchingSequences(
        peptides, exclusion_peptides, ignore_mods);
    }

    vector<String> blacklist_mods = getStringList_("blacklist:modifications");
    if (!blacklist_mods.empty())
    {
      LOG_INFO << "Filtering peptide IDs by modification blacklisting..."
               << endl;
      set<String> bad_mods(blacklist_mods.begin(), blacklist_mods.end());
      IDFilter::removePeptidesWithMatchingModifications(peptides, bad_mods);
    }


    if (getFlag_("best:strict"))
    {
      LOG_INFO << "Filtering by best peptide hits..." << endl;
      IDFilter::keepBestPeptideHits(peptides, true);
    }


    Int min_length = 0, max_length = 0;
    if (parseRange_(getStringOption_("length"), min_length, max_length))
    {
      LOG_INFO << "Filtering by peptide length..." << endl;
      if ((min_length < 0) || (max_length < 0))
      {
        LOG_ERROR << "Fatal error: negative values are not allowed for parameter 'length'" << endl;
        return ILLEGAL_PARAMETERS;
      }
      IDFilter::filterPeptidesByLength(peptides, Size(min_length),
                                       Size(max_length));
    }

    // Filter by digestion enzyme product

    String protein_fasta = getStringOption_("digest:fasta").trim();
    if (!protein_fasta.empty())
    {
      LOG_INFO << "Filtering peptides by digested protein (FASTA input)..." << endl;
      // load protein accessions from FASTA file:
      vector<FASTAFile::FASTAEntry> fasta;
      FASTAFile().load(protein_fasta, fasta);

      // Configure Enzymatic digestion
      EnzymaticDigestion digestion;
      String enzyme = getStringOption_("digest:enzyme").trim();
      if (!enzyme.empty())
      {
        digestion.setEnzyme(enzyme);
      }

      String specificity = getStringOption_("digest:specificity").trim();
      if (!specificity.empty())
      {
        digestion.setSpecificity(digestion.getSpecificityByName(specificity));
      }

      Int missed_cleavages = getIntOption_("digest:missed_cleavages");
      bool ignore_missed_cleavages = true;
      if (missed_cleavages > -1)
      {
        ignore_missed_cleavages = false;
        if (digestion.getSpecificity() == EnzymaticDigestion::SPEC_FULL)
        {
          LOG_WARN << "Specificity not full, missed_cleavages option is redundant" << endl;
        }
        digestion.setMissedCleavages(missed_cleavages);
      }
      
      bool methionine_cleavage = false;
      if (getFlag_("digest:methionine_cleavage"))
      {
        methionine_cleavage = true;
      }

      // Build the digest filter function
      IDFilter::DigestionFilter filter(fasta, 
                                       digestion, 
                                       ignore_missed_cleavages, 
                                       methionine_cleavage);
      // Filter peptides
      filter.filterPeptideEvidences(peptides);
    }


    if (getFlag_("var_mods"))
    {
      LOG_INFO << "Filtering for variable modifications..." << endl;
      // gather possible variable modifications from search parameters:
      set<String> var_mods;
      for (vector<ProteinIdentification>::iterator prot_it = proteins.begin();
           prot_it != proteins.end(); ++prot_it)
      {
        const ProteinIdentification::SearchParameters& params =
          prot_it->getSearchParameters();
        for (vector<String>::const_iterator mod_it =
               params.variable_modifications.begin(); mod_it !=
               params.variable_modifications.end(); ++mod_it)
        {
          var_mods.insert(*mod_it);
        }
      }
      IDFilter::keepPeptidesWithMatchingModifications(peptides, var_mods);
    }

    double pep_score = getDoubleOption_("score:pep");
    // @TODO: what if 0 is a reasonable cut-off for some score?
    if (pep_score != 0)
    {
      LOG_INFO << "Filtering by peptide score..." << endl;
      IDFilter::filterHitsByScore(peptides, pep_score);
    }

    Int min_charge = numeric_limits<Int>::min(), max_charge =
      numeric_limits<Int>::max();
    if (parseRange_(getStringOption_("charge"), min_charge, max_charge))
    {
      LOG_INFO << "Filtering by peptide charge..." << endl;
      IDFilter::filterPeptidesByCharge(peptides, min_charge, max_charge);
    }

    Size best_n_pep = getIntOption_("best:n_peptide_hits");
    if (best_n_pep > 0)
    {
      LOG_INFO << "Filtering by best n peptide hits..." << endl;
      IDFilter::keepNBestHits(peptides, best_n_pep);
    }

    Int min_rank = 0, max_rank = 0;
    if (parseRange_(getStringOption_("best:n_to_m_peptide_hits"), min_rank,
                    max_rank))
    {
      LOG_INFO << "Filtering by peptide hit ranks..." << endl;
      if ((min_rank < 0) || (max_rank < 0))
      {
        LOG_ERROR << "Fatal error: negative values are not allowed for parameter 'best:n_to_m_peptide_hits'" << endl;
        return ILLEGAL_PARAMETERS;
      }
      IDFilter::filterHitsByRank(peptides, Size(min_rank), Size(max_rank));
    }

    double mz_error = getDoubleOption_("mz:error");
    if (mz_error > 0)
    {
      LOG_INFO << "Filtering by mass error..." << endl;
      bool unit_ppm = (getStringOption_("mz:unit") == "ppm");
      IDFilter::filterPeptidesByMZError(peptides, mz_error, unit_ppm);
    }


    // Filtering protein identifications according to set criteria

    double protein_significance = getDoubleOption_("thresh:prot");
    if (protein_significance > 0)
    {
      LOG_INFO << "Filtering by protein significance threshold..." << endl;
      IDFilter::filterHitsBySignificance(proteins, protein_significance);
    }

    double prot_score = getDoubleOption_("score:prot");
    // @TODO: what if 0 is a reasonable cut-off for some score?
    if (prot_score != 0)
    {
      LOG_INFO << "Filtering by protein score..." << endl;
      IDFilter::filterHitsByScore(proteins, prot_score);
    }

    Size best_n_prot = getIntOption_("best:n_protein_hits");
    if (best_n_prot > 0)
    {
      LOG_INFO << "Filtering by best n protein hits..." << endl;
      IDFilter::keepNBestHits(proteins, best_n_prot);
    }

    if (getFlag_("remove_decoys"))
    {
      LOG_INFO << "Removing decoy hits..." << endl;
      IDFilter::removeDecoyHits(peptides);
      IDFilter::removeDecoyHits(proteins);
    }


    // Clean-up:

    if (!getFlag_("keep_unreferenced_protein_hits"))
    {
      LOG_INFO << "Removing unreferenced protein hits..." << endl;
      IDFilter::removeUnreferencedProteins(proteins, peptides);
    }

    IDFilter::updateHitRanks(proteins);
    IDFilter::updateHitRanks(peptides);

    // remove non-existant protein references from peptides (and optionally:
    // remove peptides with no proteins):
    bool rm_pep = getFlag_("delete_unreferenced_peptide_hits");
    if (rm_pep) LOG_INFO << "Removing peptide hits without protein references..." << endl;
    IDFilter::updateProteinReferences(peptides, proteins, rm_pep);

    IDFilter::removeEmptyIdentifications(peptides);
    // we want to keep "empty" protein IDs because they contain search meta data

    // update protein groupings if necessary:
    for (vector<ProteinIdentification>::iterator prot_it = proteins.begin();
         prot_it != proteins.end(); ++prot_it)
    {
      bool valid = IDFilter::updateProteinGroups(prot_it->getProteinGroups(),
                                                 prot_it->getHits());
      if (!valid)
      {
        LOG_WARN << "Warning: While updating protein groups, some proteins were removed from groups that are still present. The new grouping (especially the group probabilities) may not be completely valid any more." << endl;
      }

      valid = IDFilter::updateProteinGroups(
        prot_it->getIndistinguishableProteins(), prot_it->getHits());
      if (!valid)
      {
        LOG_WARN << "Warning: While updating indistinguishable proteins, some proteins were removed from groups that are still present. The new grouping (especially the group probabilities) may not be completely valid any more." << endl;
      }
    }

    // some stats
    LOG_INFO << "Before filtering:\n"
             << n_prot_ids << " protein identification(s) with "
             << n_prot_hits << " protein hit(s),\n"
             << n_pep_ids << " peptide identification(s) with "
             << n_pep_hits << " peptides hit(s).\n"
             << "After filtering:\n"
             << proteins.size() << " protein identification(s) with "
             << IDFilter::countHits(proteins) << " protein hit(s),\n"
             << peptides.size() << " peptide identification(s) with "
             << IDFilter::countHits(peptides) << " peptides hit(s)." << endl;

    IdXMLFile().store(outputfile_name, proteins, peptides);

    return EXECUTION_OK;
  }

Example #3

Show file

File: ProteinResolver.C Project: BioITer/OpenMS

  void ProteinResolver::buildingISDGroups_(vector<ProteinEntry> & protein_nodes, vector<PeptideEntry> & peptide_nodes,
                                           vector<ISDGroup> & isd_groups)
  {
    EnzymaticDigestion digestor;
    String enzyme_name = param_.getValue("resolver:enzyme");
    digestor.setEnzyme(digestor.getEnzymeByName(enzyme_name));
    UInt min_size = param_.getValue("resolver:min_length");
    UInt missed_cleavages = param_.getValue("resolver:missed_cleavages");
    digestor.setMissedCleavages(missed_cleavages);


    //-------------------------------------------------------------
    // building ISD Groups
    //-------------------------------------------------------------

    vector<AASequence> temp_peptides;
    map<String, set<Size> > peptides;

    for (Size i = 0; i < protein_data_.size(); ++i)
    {
      protein_nodes[i].fasta_entry = &protein_data_[i];
      protein_nodes[i].traversed = false;
      protein_nodes[i].index = i;
      protein_nodes[i].protein_type = ProteinEntry::secondary;
      protein_nodes[i].weight = AASequence(protein_data_[i].sequence).getMonoWeight();
      protein_nodes[i].coverage = 0.;
      protein_nodes[i].number_of_experimental_peptides = 0;
      digestor.digest(AASequence(protein_data_[i].sequence), temp_peptides);
      for (Size j = 0; j < temp_peptides.size(); ++j)
      {
        if (temp_peptides[j].size() >= min_size)
        {
          peptides[temp_peptides[j].toUnmodifiedString()].insert(i);
        }
      }
    }
    // important to resize
    peptide_nodes.resize(peptides.size());
    vector<PeptideEntry>::iterator pep_node = peptide_nodes.begin();
    Size peptide_counter = 0;

    for (map<String, set<Size> >::iterator i  = peptides.begin(); i != peptides.end(); ++i, ++pep_node, ++peptide_counter)
    {
      pep_node->index = peptide_counter;
      pep_node->traversed = false;
      pep_node->sequence = (*i).first;
      pep_node->experimental = false;
      for (set<Size>::iterator j = (*i).second.begin(); j != (*i).second.end(); ++j)
      {
        pep_node->proteins.push_back(&protein_nodes[*j]);
        protein_nodes[*j].peptides.push_back(&*pep_node);
      }
    }
    //ISDGraph constructed
    Size isd_group_counter = 0;
    Size i = 0;
    for (vector<ProteinEntry>::iterator prot_node = protein_nodes.begin(); prot_node != protein_nodes.end(); ++prot_node)
    {
      ++i;
      if (!prot_node->traversed)
      {
        prot_node->traversed = true;
        ISDGroup group;
        group.index = isd_group_counter;
        ++isd_group_counter;
        traversProtein_(&*prot_node, group);
        isd_groups.push_back(group);
      }
    }
  }

Example #4

Show file

File: Digestor.C Project: aiche/open-ms-mirror

  ExitCodes main_(int, const char **)
  {
    vector<ProteinIdentification> protein_identifications;

    vector<PeptideIdentification> identifications;
    PeptideIdentification peptide_identification;
    DateTime date_time = DateTime::now();
    String date_time_string = date_time.get();
    peptide_identification.setIdentifier("In-silico_digestion" + date_time_string);

    ProteinIdentification protein_identification;

    protein_identifications.push_back(ProteinIdentification());
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String inputfile_name = getStringOption_("in");
    String outputfile_name = getStringOption_("out");

    //input file type
    FileHandler fh;
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(outputfile_name);
      writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 2);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      LOG_ERROR << ("Error: Could not determine output file type!") << std::endl;
      return PARSE_ERROR;
    }

    Size min_size = getIntOption_("min_length");
    Size max_size = getIntOption_("max_length");
    Size missed_cleavages = getIntOption_("missed_cleavages");


    bool has_FASTA_output = (out_type == FileTypes::FASTA);

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    std::vector<FASTAFile::FASTAEntry> protein_data;
    FASTAFile().load(inputfile_name, protein_data);
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    // This should be updated if more cleavage enzymes are available
    ProteinIdentification::SearchParameters search_parameters;
    String enzyme = getStringOption_("enzyme");
    EnzymaticDigestion digestor;
    if (enzyme == "Trypsin")
    {
      digestor.setEnzyme(EnzymaticDigestion::ENZYME_TRYPSIN);
      digestor.setMissedCleavages(missed_cleavages);
      search_parameters.enzyme = ProteinIdentification::TRYPSIN;
    }
    else if (enzyme == "none")
    {
      search_parameters.enzyme = ProteinIdentification::NO_ENZYME;
    }
    else
    {
      LOG_ERROR << "Internal error in Digestor, when evaluating enzyme name! Please report this!" << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    vector<String> protein_accessions(1);
    PeptideHit temp_peptide_hit;

    protein_identifications[0].setSearchParameters(search_parameters);
    protein_identifications[0].setDateTime(date_time);
    protein_identifications[0].setSearchEngine("In-silico digestion");
    protein_identifications[0].setIdentifier("In-silico_digestion" + date_time_string);

    std::vector<FASTAFile::FASTAEntry> all_peptides;

    Size dropped_bylength(0);   // stats for removing candidates

    for (Size i = 0; i < protein_data.size(); ++i)
    {
      if (!has_FASTA_output)
      {
        protein_accessions[0] = protein_data[i].identifier;
        ProteinHit temp_protein_hit;
        temp_protein_hit.setSequence(protein_data[i].sequence);
        temp_protein_hit.setAccession(protein_accessions[0]);
        protein_identifications[0].insertHit(temp_protein_hit);
        temp_peptide_hit.setProteinAccessions(protein_accessions);
      }

      vector<AASequence> temp_peptides;
      if (enzyme == "none")
      {
        temp_peptides.push_back(AASequence(protein_data[i].sequence));
      }
      else
      {
        digestor.digest(AASequence(protein_data[i].sequence), temp_peptides);
      }

      for (Size j = 0; j < temp_peptides.size(); ++j)
      {
        if ((temp_peptides[j].size() >= min_size) &&
            (temp_peptides[j].size() <= max_size))
        {
          if (!has_FASTA_output)
          {
            temp_peptide_hit.setSequence(temp_peptides[j]);
            peptide_identification.insertHit(temp_peptide_hit);
            identifications.push_back(peptide_identification);
            peptide_identification.setHits(std::vector<PeptideHit>());   // clear
          }
          else   // for FASTA file output
          {
            FASTAFile::FASTAEntry pep(protein_data[i].identifier, protein_data[i].description, temp_peptides[j].toString());
            all_peptides.push_back(pep);
          }
        }
        else
        {
          ++dropped_bylength;
        }
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    if (has_FASTA_output)
    {
      FASTAFile().store(outputfile_name, all_peptides);
    }
    else
    {
      IdXMLFile().store(outputfile_name,
                        protein_identifications,
                        identifications);
    }

    Size pep_remaining_count = (has_FASTA_output ? all_peptides.size() : identifications.size());
    LOG_INFO << "Statistics:\n"
             << "  total #peptides after digestion:         " << pep_remaining_count + dropped_bylength << "\n"
             << "  removed #peptides (length restrictions): " << dropped_bylength << "\n"
             << "  remaining #peptides:                     " << pep_remaining_count << std::endl;

    return EXECUTION_OK;
  }

Example #5

Show file

File: EnzymaticDigestion_test.cpp Project: FabianAicheler/OpenMS

START_TEST(EnzymaticDigestion, "$Id$")

/////////////////////////////////////////////////////////////
    EnzymaticDigestion * e_ptr = 0;
    EnzymaticDigestion* e_nullPointer = 0;
    START_SECTION((EnzymaticDigestion()))
    e_ptr = new EnzymaticDigestion;
    TEST_NOT_EQUAL(e_ptr, e_nullPointer)
END_SECTION

START_SECTION([EXTRA] ~EnzymaticDigestion())
    delete e_ptr;
END_SECTION

START_SECTION((EnzymaticDigestion(const EnzymaticDigestion &rhs)))
    EnzymaticDigestion ed;
    ed.setMissedCleavages(1234);
    ed.setEnzyme("no cleavage");
    ed.setSpecificity(EnzymaticDigestion::SPEC_SEMI);
    
    EnzymaticDigestion ed2(ed);
    
    TEST_EQUAL(ed.getMissedCleavages(), ed2.getMissedCleavages());
    TEST_EQUAL(ed.getEnzymeName(), ed2.getEnzymeName());
    TEST_EQUAL(ed.getSpecificity(), ed2.getSpecificity());

END_SECTION

START_SECTION((EnzymaticDigestion & operator=(const EnzymaticDigestion &rhs)))
    EnzymaticDigestion ed;
    ed.setMissedCleavages(1234);

Example #6

Show file

File: EnzymaticDigestion_test.C Project: aiche/open-ms-mirror

/////////////////////////////////////////////////////////////

EnzymaticDigestion* e_ptr = 0;
EnzymaticDigestion* e_nullPointer = 0;
START_SECTION((EnzymaticDigestion()))
	e_ptr = new EnzymaticDigestion;
	TEST_NOT_EQUAL(e_ptr, e_nullPointer)
END_SECTION

START_SECTION([EXTRA] ~EnzymaticDigestion())
	delete e_ptr;
END_SECTION

START_SECTION((EnzymaticDigestion(const EnzymaticDigestion& rhs) ))
  EnzymaticDigestion ed;
  ed.setMissedCleavages(1234);
  ed.setEnzyme(EnzymaticDigestion::SIZE_OF_ENZYMES);
  ed.setSpecificity(EnzymaticDigestion::SPEC_SEMI);
  ed.setLogModelEnabled(true);
  ed.setLogThreshold(81231);
  
  EnzymaticDigestion ed2(ed);

  TEST_EQUAL(ed.getMissedCleavages(), ed2.getMissedCleavages());
  TEST_EQUAL(ed.getEnzyme(), ed2.getEnzyme());
  TEST_EQUAL(ed.getSpecificity(), ed2.getSpecificity());
  TEST_EQUAL(ed.isLogModelEnabled(), ed2.isLogModelEnabled());
  TEST_EQUAL(ed.getLogThreshold(), ed2.getLogThreshold());

END_SECTION