void DigestSimulation::digest(SimTypes::FeatureMapSim& feature_map) { LOG_INFO << "Digest Simulation ... started" << std::endl; if ((String)param_.getValue("enzyme") == String("none")) { //peptides = proteins; // convert all proteins into peptides // for each protein_hit in the FeatureMap for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin(); protein_hit != feature_map.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { // generate a PeptideHit hit with the correct link to the protein PeptideHit pep_hit(1.0, 1, 0, AASequence::fromString(protein_hit->getSequence())); PeptideEvidence pe; pe.setProteinAccession(protein_hit->getAccession()); pep_hit.addPeptideEvidence(pe); // add the PeptideHit to the PeptideIdentification PeptideIdentification pep_id; pep_id.insertHit(pep_hit); // generate Feature with correct Intensity and corresponding PeptideIdentification Feature f; f.getPeptideIdentifications().push_back(pep_id); f.setIntensity(protein_hit->getMetaValue("intensity")); // copy intensity meta-values and additional annotations from Protein to Feature StringList keys; protein_hit->getKeys(keys); for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key) { f.setMetaValue(*it_key, protein_hit->getMetaValue(*it_key)); } // add Feature to SimTypes::FeatureMapSim feature_map.push_back(f); } return; } UInt min_peptide_length = param_.getValue("min_peptide_length"); bool use_log_model = param_.getValue("model") == "trained" ? true : false; UInt missed_cleavages = param_.getValue("model_naive:missed_cleavages"); double cleave_threshold = param_.getValue("model_trained:threshold"); EnzymaticDigestion digestion; digestion.setEnzyme(digestion.getEnzymeByName((String)param_.getValue("enzyme"))); digestion.setLogModelEnabled(use_log_model); digestion.setLogThreshold(cleave_threshold); std::vector<AASequence> digestion_products; // keep track of generated features std::map<AASequence, Feature> generated_features; // Iterate through ProteinHits in the FeatureMap and digest them for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin(); protein_hit != feature_map.getProteinIdentifications()[0].getHits().end(); ++protein_hit) { // determine abundance of each digestion product (this is quite long now...) // we assume that each digestion product will have the same abundance // note: missed cleavages reduce overall abundance as they combine two (or more) single peptides // how many "atomic"(i.e. non-cleavable) peptides are created? digestion.setMissedCleavages(0); Size complete_digest_count = digestion.peptideCount(AASequence::fromString(protein_hit->getSequence())); // compute average number of "atomic" peptides summed from all digestion products Size number_atomic_whole = 0; Size number_of_digestion_products = 0; for (Size i = 0; (i <= missed_cleavages) && (i < complete_digest_count); ++i) { number_atomic_whole += (complete_digest_count - i) * (i + 1); number_of_digestion_products += (complete_digest_count - i); } // mean number of "atomic" peptides per digestion product is now: number_atomic_whole / number_of_digestion_products // -> thus abundance of a digestion product is: #proteins / avg#of"atomic"peptides // i.e.: protein->second / (number_atomic_whole / number_of_digestion_products) Map<String, SimTypes::SimIntensityType> intensities; StringList keys; protein_hit->getKeys(keys); for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key) { if (!it_key->hasPrefix("intensity")) continue; intensities[*it_key] = std::max(SimTypes::SimIntensityType(1), SimTypes::SimIntensityType(protein_hit->getMetaValue(*it_key)) * SimTypes::SimIntensityType(number_of_digestion_products) / SimTypes::SimIntensityType(number_atomic_whole)); // order changed for numeric stability } // do real digest digestion.setMissedCleavages(missed_cleavages); digestion.digest(AASequence::fromString(protein_hit->getSequence()), digestion_products); for (std::vector<AASequence>::const_iterator dp_it = digestion_products.begin(); dp_it != digestion_products.end(); ++dp_it) { if (dp_it->size() < min_peptide_length) continue; // sum equal peptide's intensities // *dp_it -> peptide // If we see this Peptide the first time -> generate corresponding feature if (generated_features.count(*dp_it) == 0) { PeptideHit pep_hit(1.0, 1, 0, *dp_it); PeptideIdentification pep_id; pep_id.insertHit(pep_hit); // create feature Feature f; f.getPeptideIdentifications().push_back(pep_id); // set intensity to 0 to avoid problems when summing up f.setIntensity(0.0); // copy all non-intensity meta values StringList lkeys; protein_hit->getKeys(lkeys); for (StringList::iterator key = lkeys.begin(); key != lkeys.end(); ++key) { if (!key->hasPrefix("intensity")) { f.setMetaValue(*key, protein_hit->getMetaValue(*key)); } } // insert into map generated_features.insert(std::make_pair(*dp_it, f)); } // sum up intensity values generated_features[*dp_it].setIntensity(generated_features[*dp_it].getIntensity() + intensities["intensity"]); // ... same for other intensities (iTRAQ...) for (Map<String, SimTypes::SimIntensityType>::const_iterator it_other = intensities.begin(); it_other != intensities.end(); ++it_other) { if (!generated_features[*dp_it].metaValueExists(it_other->first)) { generated_features[*dp_it].setMetaValue(it_other->first, it_other->second); } else { generated_features[*dp_it].setMetaValue(it_other->first, SimTypes::SimIntensityType(generated_features[*dp_it].getMetaValue(it_other->first)) + it_other->second); } } // add current protein accession // existing proteins accessions ... std::set<String> protein_accessions = generated_features[*dp_it].getPeptideIdentifications()[0].getHits()[0].extractProteinAccessions(); // ... add accession of current protein protein_accessions.insert(protein_hit->getAccession()); std::vector<PeptideIdentification> pep_idents = generated_features[*dp_it].getPeptideIdentifications(); std::vector<PeptideHit> pep_hits = pep_idents[0].getHits(); for (std::set<String>::const_iterator s_it = protein_accessions.begin(); s_it != protein_accessions.end(); ++s_it) { PeptideEvidence pe; pe.setProteinAccession(*s_it); pep_hits[0].addPeptideEvidence(pe); } pep_idents[0].setHits(pep_hits); generated_features[*dp_it].setPeptideIdentifications(pep_idents); } } // add generated_features to FeatureMap for (std::map<AASequence, Feature>::iterator it_gf = generated_features.begin(); it_gf != generated_features.end(); ++it_gf) { // round up intensity (it_gf->second).setIntensity(ceil((it_gf->second).getIntensity())); feature_map.push_back(it_gf->second); } }
ExitCodes main_(int, const char**) { String inputfile_name = getStringOption_("in"); String outputfile_name = getStringOption_("out"); vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; IdXMLFile().load(inputfile_name, proteins, peptides); Size n_prot_ids = proteins.size(); Size n_prot_hits = IDFilter::countHits(proteins); Size n_pep_ids = peptides.size(); Size n_pep_hits = IDFilter::countHits(peptides); // Filtering peptide identification according to set criteria double rt_high = numeric_limits<double>::infinity(), rt_low = -rt_high; if (parseRange_(getStringOption_("precursor:rt"), rt_low, rt_high)) { LOG_INFO << "Filtering peptide IDs by precursor RT..." << endl; IDFilter::filterPeptidesByRT(peptides, rt_low, rt_high); } double mz_high = numeric_limits<double>::infinity(), mz_low = -mz_high; if (parseRange_(getStringOption_("precursor:mz"), mz_low, mz_high)) { LOG_INFO << "Filtering peptide IDs by precursor m/z..."; IDFilter::filterPeptidesByMZ(peptides, mz_low, mz_high); } // Filtering peptide hits according to set criteria if (getFlag_("unique")) { LOG_INFO << "Removing duplicate peptide hits..." << endl; IDFilter::removeDuplicatePeptideHits(peptides); } if (getFlag_("unique_per_protein")) { LOG_INFO << "Filtering peptides by unique match to a protein..." << endl; IDFilter::keepUniquePeptidesPerProtein(peptides); } double peptide_significance = getDoubleOption_("thresh:pep"); if (peptide_significance > 0) { LOG_INFO << "Filtering by peptide significance threshold..." << endl; IDFilter::filterHitsBySignificance(peptides, peptide_significance); } double pred_rt_pv = getDoubleOption_("rt:p_value"); if (pred_rt_pv > 0) { LOG_INFO << "Filtering by RT prediction p-value..." << endl; IDFilter::filterPeptidesByRTPredictPValue( peptides, "predicted_RT_p_value", pred_rt_pv); } double pred_rt_pv_1d = getDoubleOption_("rt:p_value_1st_dim"); if (pred_rt_pv_1d > 0) { LOG_INFO << "Filtering by RT prediction p-value (first dim.)..." << endl; IDFilter::filterPeptidesByRTPredictPValue( peptides, "predicted_RT_p_value_first_dim", pred_rt_pv_1d); } String whitelist_fasta = getStringOption_("whitelist:proteins").trim(); if (!whitelist_fasta.empty()) { LOG_INFO << "Filtering by protein whitelisting (FASTA input)..." << endl; // load protein accessions from FASTA file: vector<FASTAFile::FASTAEntry> fasta; FASTAFile().load(whitelist_fasta, fasta); set<String> accessions; for (vector<FASTAFile::FASTAEntry>::iterator it = fasta.begin(); it != fasta.end(); ++it) { accessions.insert(it->identifier); } IDFilter::keepHitsMatchingProteins(peptides, accessions); IDFilter::keepHitsMatchingProteins(proteins, accessions); } vector<String> whitelist_accessions = getStringList_("whitelist:protein_accessions"); if (!whitelist_accessions.empty()) { LOG_INFO << "Filtering by protein whitelisting (accessions input)..." << endl; set<String> accessions(whitelist_accessions.begin(), whitelist_accessions.end()); IDFilter::keepHitsMatchingProteins(peptides, accessions); IDFilter::keepHitsMatchingProteins(proteins, accessions); } String whitelist_peptides = getStringOption_("whitelist:peptides").trim(); if (!whitelist_peptides.empty()) { LOG_INFO << "Filtering by inclusion peptide whitelisting..." << endl; vector<PeptideIdentification> inclusion_peptides; vector<ProteinIdentification> inclusion_proteins; // ignored IdXMLFile().load(whitelist_peptides, inclusion_proteins, inclusion_peptides); bool ignore_mods = getFlag_("whitelist:ignore_modifications"); IDFilter::keepPeptidesWithMatchingSequences(peptides, inclusion_peptides, ignore_mods); } vector<String> whitelist_mods = getStringList_("whitelist:modifications"); if (!whitelist_mods.empty()) { LOG_INFO << "Filtering peptide IDs by modification whitelisting..." << endl; set<String> good_mods(whitelist_mods.begin(), whitelist_mods.end()); IDFilter::keepPeptidesWithMatchingModifications(peptides, good_mods); } String blacklist_fasta = getStringOption_("blacklist:proteins").trim(); if (!blacklist_fasta.empty()) { LOG_INFO << "Filtering by protein blacklisting (FASTA input)..." << endl; // load protein accessions from FASTA file: vector<FASTAFile::FASTAEntry> fasta; FASTAFile().load(blacklist_fasta, fasta); set<String> accessions; for (vector<FASTAFile::FASTAEntry>::iterator it = fasta.begin(); it != fasta.end(); ++it) { accessions.insert(it->identifier); } IDFilter::removeHitsMatchingProteins(peptides, accessions); IDFilter::removeHitsMatchingProteins(proteins, accessions); } vector<String> blacklist_accessions = getStringList_("blacklist:protein_accessions"); if (!blacklist_accessions.empty()) { LOG_INFO << "Filtering by protein blacklisting (accessions input)..." << endl; set<String> accessions(blacklist_accessions.begin(), blacklist_accessions.end()); IDFilter::removeHitsMatchingProteins(peptides, accessions); IDFilter::removeHitsMatchingProteins(proteins, accessions); } String blacklist_peptides = getStringOption_("blacklist:peptides").trim(); if (!blacklist_peptides.empty()) { LOG_INFO << "Filtering by exclusion peptide blacklisting..." << endl; vector<PeptideIdentification> exclusion_peptides; vector<ProteinIdentification> exclusion_proteins; // ignored IdXMLFile().load(blacklist_peptides, exclusion_proteins, exclusion_peptides); bool ignore_mods = getFlag_("blacklist:ignore_modifications"); IDFilter::removePeptidesWithMatchingSequences( peptides, exclusion_peptides, ignore_mods); } vector<String> blacklist_mods = getStringList_("blacklist:modifications"); if (!blacklist_mods.empty()) { LOG_INFO << "Filtering peptide IDs by modification blacklisting..." << endl; set<String> bad_mods(blacklist_mods.begin(), blacklist_mods.end()); IDFilter::removePeptidesWithMatchingModifications(peptides, bad_mods); } if (getFlag_("best:strict")) { LOG_INFO << "Filtering by best peptide hits..." << endl; IDFilter::keepBestPeptideHits(peptides, true); } Int min_length = 0, max_length = 0; if (parseRange_(getStringOption_("length"), min_length, max_length)) { LOG_INFO << "Filtering by peptide length..." << endl; if ((min_length < 0) || (max_length < 0)) { LOG_ERROR << "Fatal error: negative values are not allowed for parameter 'length'" << endl; return ILLEGAL_PARAMETERS; } IDFilter::filterPeptidesByLength(peptides, Size(min_length), Size(max_length)); } // Filter by digestion enzyme product String protein_fasta = getStringOption_("digest:fasta").trim(); if (!protein_fasta.empty()) { LOG_INFO << "Filtering peptides by digested protein (FASTA input)..." << endl; // load protein accessions from FASTA file: vector<FASTAFile::FASTAEntry> fasta; FASTAFile().load(protein_fasta, fasta); // Configure Enzymatic digestion EnzymaticDigestion digestion; String enzyme = getStringOption_("digest:enzyme").trim(); if (!enzyme.empty()) { digestion.setEnzyme(enzyme); } String specificity = getStringOption_("digest:specificity").trim(); if (!specificity.empty()) { digestion.setSpecificity(digestion.getSpecificityByName(specificity)); } Int missed_cleavages = getIntOption_("digest:missed_cleavages"); bool ignore_missed_cleavages = true; if (missed_cleavages > -1) { ignore_missed_cleavages = false; if (digestion.getSpecificity() == EnzymaticDigestion::SPEC_FULL) { LOG_WARN << "Specificity not full, missed_cleavages option is redundant" << endl; } digestion.setMissedCleavages(missed_cleavages); } bool methionine_cleavage = false; if (getFlag_("digest:methionine_cleavage")) { methionine_cleavage = true; } // Build the digest filter function IDFilter::DigestionFilter filter(fasta, digestion, ignore_missed_cleavages, methionine_cleavage); // Filter peptides filter.filterPeptideEvidences(peptides); } if (getFlag_("var_mods")) { LOG_INFO << "Filtering for variable modifications..." << endl; // gather possible variable modifications from search parameters: set<String> var_mods; for (vector<ProteinIdentification>::iterator prot_it = proteins.begin(); prot_it != proteins.end(); ++prot_it) { const ProteinIdentification::SearchParameters& params = prot_it->getSearchParameters(); for (vector<String>::const_iterator mod_it = params.variable_modifications.begin(); mod_it != params.variable_modifications.end(); ++mod_it) { var_mods.insert(*mod_it); } } IDFilter::keepPeptidesWithMatchingModifications(peptides, var_mods); } double pep_score = getDoubleOption_("score:pep"); // @TODO: what if 0 is a reasonable cut-off for some score? if (pep_score != 0) { LOG_INFO << "Filtering by peptide score..." << endl; IDFilter::filterHitsByScore(peptides, pep_score); } Int min_charge = numeric_limits<Int>::min(), max_charge = numeric_limits<Int>::max(); if (parseRange_(getStringOption_("charge"), min_charge, max_charge)) { LOG_INFO << "Filtering by peptide charge..." << endl; IDFilter::filterPeptidesByCharge(peptides, min_charge, max_charge); } Size best_n_pep = getIntOption_("best:n_peptide_hits"); if (best_n_pep > 0) { LOG_INFO << "Filtering by best n peptide hits..." << endl; IDFilter::keepNBestHits(peptides, best_n_pep); } Int min_rank = 0, max_rank = 0; if (parseRange_(getStringOption_("best:n_to_m_peptide_hits"), min_rank, max_rank)) { LOG_INFO << "Filtering by peptide hit ranks..." << endl; if ((min_rank < 0) || (max_rank < 0)) { LOG_ERROR << "Fatal error: negative values are not allowed for parameter 'best:n_to_m_peptide_hits'" << endl; return ILLEGAL_PARAMETERS; } IDFilter::filterHitsByRank(peptides, Size(min_rank), Size(max_rank)); } double mz_error = getDoubleOption_("mz:error"); if (mz_error > 0) { LOG_INFO << "Filtering by mass error..." << endl; bool unit_ppm = (getStringOption_("mz:unit") == "ppm"); IDFilter::filterPeptidesByMZError(peptides, mz_error, unit_ppm); } // Filtering protein identifications according to set criteria double protein_significance = getDoubleOption_("thresh:prot"); if (protein_significance > 0) { LOG_INFO << "Filtering by protein significance threshold..." << endl; IDFilter::filterHitsBySignificance(proteins, protein_significance); } double prot_score = getDoubleOption_("score:prot"); // @TODO: what if 0 is a reasonable cut-off for some score? if (prot_score != 0) { LOG_INFO << "Filtering by protein score..." << endl; IDFilter::filterHitsByScore(proteins, prot_score); } Size best_n_prot = getIntOption_("best:n_protein_hits"); if (best_n_prot > 0) { LOG_INFO << "Filtering by best n protein hits..." << endl; IDFilter::keepNBestHits(proteins, best_n_prot); } if (getFlag_("remove_decoys")) { LOG_INFO << "Removing decoy hits..." << endl; IDFilter::removeDecoyHits(peptides); IDFilter::removeDecoyHits(proteins); } // Clean-up: if (!getFlag_("keep_unreferenced_protein_hits")) { LOG_INFO << "Removing unreferenced protein hits..." << endl; IDFilter::removeUnreferencedProteins(proteins, peptides); } IDFilter::updateHitRanks(proteins); IDFilter::updateHitRanks(peptides); // remove non-existant protein references from peptides (and optionally: // remove peptides with no proteins): bool rm_pep = getFlag_("delete_unreferenced_peptide_hits"); if (rm_pep) LOG_INFO << "Removing peptide hits without protein references..." << endl; IDFilter::updateProteinReferences(peptides, proteins, rm_pep); IDFilter::removeEmptyIdentifications(peptides); // we want to keep "empty" protein IDs because they contain search meta data // update protein groupings if necessary: for (vector<ProteinIdentification>::iterator prot_it = proteins.begin(); prot_it != proteins.end(); ++prot_it) { bool valid = IDFilter::updateProteinGroups(prot_it->getProteinGroups(), prot_it->getHits()); if (!valid) { LOG_WARN << "Warning: While updating protein groups, some proteins were removed from groups that are still present. The new grouping (especially the group probabilities) may not be completely valid any more." << endl; } valid = IDFilter::updateProteinGroups( prot_it->getIndistinguishableProteins(), prot_it->getHits()); if (!valid) { LOG_WARN << "Warning: While updating indistinguishable proteins, some proteins were removed from groups that are still present. The new grouping (especially the group probabilities) may not be completely valid any more." << endl; } } // some stats LOG_INFO << "Before filtering:\n" << n_prot_ids << " protein identification(s) with " << n_prot_hits << " protein hit(s),\n" << n_pep_ids << " peptide identification(s) with " << n_pep_hits << " peptides hit(s).\n" << "After filtering:\n" << proteins.size() << " protein identification(s) with " << IDFilter::countHits(proteins) << " protein hit(s),\n" << peptides.size() << " peptide identification(s) with " << IDFilter::countHits(peptides) << " peptides hit(s)." << endl; IdXMLFile().store(outputfile_name, proteins, peptides); return EXECUTION_OK; }
void ProteinResolver::buildingISDGroups_(vector<ProteinEntry> & protein_nodes, vector<PeptideEntry> & peptide_nodes, vector<ISDGroup> & isd_groups) { EnzymaticDigestion digestor; String enzyme_name = param_.getValue("resolver:enzyme"); digestor.setEnzyme(digestor.getEnzymeByName(enzyme_name)); UInt min_size = param_.getValue("resolver:min_length"); UInt missed_cleavages = param_.getValue("resolver:missed_cleavages"); digestor.setMissedCleavages(missed_cleavages); //------------------------------------------------------------- // building ISD Groups //------------------------------------------------------------- vector<AASequence> temp_peptides; map<String, set<Size> > peptides; for (Size i = 0; i < protein_data_.size(); ++i) { protein_nodes[i].fasta_entry = &protein_data_[i]; protein_nodes[i].traversed = false; protein_nodes[i].index = i; protein_nodes[i].protein_type = ProteinEntry::secondary; protein_nodes[i].weight = AASequence(protein_data_[i].sequence).getMonoWeight(); protein_nodes[i].coverage = 0.; protein_nodes[i].number_of_experimental_peptides = 0; digestor.digest(AASequence(protein_data_[i].sequence), temp_peptides); for (Size j = 0; j < temp_peptides.size(); ++j) { if (temp_peptides[j].size() >= min_size) { peptides[temp_peptides[j].toUnmodifiedString()].insert(i); } } } // important to resize peptide_nodes.resize(peptides.size()); vector<PeptideEntry>::iterator pep_node = peptide_nodes.begin(); Size peptide_counter = 0; for (map<String, set<Size> >::iterator i = peptides.begin(); i != peptides.end(); ++i, ++pep_node, ++peptide_counter) { pep_node->index = peptide_counter; pep_node->traversed = false; pep_node->sequence = (*i).first; pep_node->experimental = false; for (set<Size>::iterator j = (*i).second.begin(); j != (*i).second.end(); ++j) { pep_node->proteins.push_back(&protein_nodes[*j]); protein_nodes[*j].peptides.push_back(&*pep_node); } } //ISDGraph constructed Size isd_group_counter = 0; Size i = 0; for (vector<ProteinEntry>::iterator prot_node = protein_nodes.begin(); prot_node != protein_nodes.end(); ++prot_node) { ++i; if (!prot_node->traversed) { prot_node->traversed = true; ISDGroup group; group.index = isd_group_counter; ++isd_group_counter; traversProtein_(&*prot_node, group); isd_groups.push_back(group); } } }
ExitCodes main_(int, const char **) { vector<ProteinIdentification> protein_identifications; vector<PeptideIdentification> identifications; PeptideIdentification peptide_identification; DateTime date_time = DateTime::now(); String date_time_string = date_time.get(); peptide_identification.setIdentifier("In-silico_digestion" + date_time_string); ProteinIdentification protein_identification; protein_identifications.push_back(ProteinIdentification()); //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_name = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //input file type FileHandler fh; FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(outputfile_name); writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 2); } if (out_type == FileTypes::UNKNOWN) { LOG_ERROR << ("Error: Could not determine output file type!") << std::endl; return PARSE_ERROR; } Size min_size = getIntOption_("min_length"); Size max_size = getIntOption_("max_length"); Size missed_cleavages = getIntOption_("missed_cleavages"); bool has_FASTA_output = (out_type == FileTypes::FASTA); //------------------------------------------------------------- // reading input //------------------------------------------------------------- std::vector<FASTAFile::FASTAEntry> protein_data; FASTAFile().load(inputfile_name, protein_data); //------------------------------------------------------------- // calculations //------------------------------------------------------------- // This should be updated if more cleavage enzymes are available ProteinIdentification::SearchParameters search_parameters; String enzyme = getStringOption_("enzyme"); EnzymaticDigestion digestor; if (enzyme == "Trypsin") { digestor.setEnzyme(EnzymaticDigestion::ENZYME_TRYPSIN); digestor.setMissedCleavages(missed_cleavages); search_parameters.enzyme = ProteinIdentification::TRYPSIN; } else if (enzyme == "none") { search_parameters.enzyme = ProteinIdentification::NO_ENZYME; } else { LOG_ERROR << "Internal error in Digestor, when evaluating enzyme name! Please report this!" << std::endl; return ILLEGAL_PARAMETERS; } vector<String> protein_accessions(1); PeptideHit temp_peptide_hit; protein_identifications[0].setSearchParameters(search_parameters); protein_identifications[0].setDateTime(date_time); protein_identifications[0].setSearchEngine("In-silico digestion"); protein_identifications[0].setIdentifier("In-silico_digestion" + date_time_string); std::vector<FASTAFile::FASTAEntry> all_peptides; Size dropped_bylength(0); // stats for removing candidates for (Size i = 0; i < protein_data.size(); ++i) { if (!has_FASTA_output) { protein_accessions[0] = protein_data[i].identifier; ProteinHit temp_protein_hit; temp_protein_hit.setSequence(protein_data[i].sequence); temp_protein_hit.setAccession(protein_accessions[0]); protein_identifications[0].insertHit(temp_protein_hit); temp_peptide_hit.setProteinAccessions(protein_accessions); } vector<AASequence> temp_peptides; if (enzyme == "none") { temp_peptides.push_back(AASequence(protein_data[i].sequence)); } else { digestor.digest(AASequence(protein_data[i].sequence), temp_peptides); } for (Size j = 0; j < temp_peptides.size(); ++j) { if ((temp_peptides[j].size() >= min_size) && (temp_peptides[j].size() <= max_size)) { if (!has_FASTA_output) { temp_peptide_hit.setSequence(temp_peptides[j]); peptide_identification.insertHit(temp_peptide_hit); identifications.push_back(peptide_identification); peptide_identification.setHits(std::vector<PeptideHit>()); // clear } else // for FASTA file output { FASTAFile::FASTAEntry pep(protein_data[i].identifier, protein_data[i].description, temp_peptides[j].toString()); all_peptides.push_back(pep); } } else { ++dropped_bylength; } } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- if (has_FASTA_output) { FASTAFile().store(outputfile_name, all_peptides); } else { IdXMLFile().store(outputfile_name, protein_identifications, identifications); } Size pep_remaining_count = (has_FASTA_output ? all_peptides.size() : identifications.size()); LOG_INFO << "Statistics:\n" << " total #peptides after digestion: " << pep_remaining_count + dropped_bylength << "\n" << " removed #peptides (length restrictions): " << dropped_bylength << "\n" << " remaining #peptides: " << pep_remaining_count << std::endl; return EXECUTION_OK; }
START_TEST(EnzymaticDigestion, "$Id$") ///////////////////////////////////////////////////////////// EnzymaticDigestion * e_ptr = 0; EnzymaticDigestion* e_nullPointer = 0; START_SECTION((EnzymaticDigestion())) e_ptr = new EnzymaticDigestion; TEST_NOT_EQUAL(e_ptr, e_nullPointer) END_SECTION START_SECTION([EXTRA] ~EnzymaticDigestion()) delete e_ptr; END_SECTION START_SECTION((EnzymaticDigestion(const EnzymaticDigestion &rhs))) EnzymaticDigestion ed; ed.setMissedCleavages(1234); ed.setEnzyme("no cleavage"); ed.setSpecificity(EnzymaticDigestion::SPEC_SEMI); EnzymaticDigestion ed2(ed); TEST_EQUAL(ed.getMissedCleavages(), ed2.getMissedCleavages()); TEST_EQUAL(ed.getEnzymeName(), ed2.getEnzymeName()); TEST_EQUAL(ed.getSpecificity(), ed2.getSpecificity()); END_SECTION START_SECTION((EnzymaticDigestion & operator=(const EnzymaticDigestion &rhs))) EnzymaticDigestion ed; ed.setMissedCleavages(1234);
///////////////////////////////////////////////////////////// EnzymaticDigestion* e_ptr = 0; EnzymaticDigestion* e_nullPointer = 0; START_SECTION((EnzymaticDigestion())) e_ptr = new EnzymaticDigestion; TEST_NOT_EQUAL(e_ptr, e_nullPointer) END_SECTION START_SECTION([EXTRA] ~EnzymaticDigestion()) delete e_ptr; END_SECTION START_SECTION((EnzymaticDigestion(const EnzymaticDigestion& rhs) )) EnzymaticDigestion ed; ed.setMissedCleavages(1234); ed.setEnzyme(EnzymaticDigestion::SIZE_OF_ENZYMES); ed.setSpecificity(EnzymaticDigestion::SPEC_SEMI); ed.setLogModelEnabled(true); ed.setLogThreshold(81231); EnzymaticDigestion ed2(ed); TEST_EQUAL(ed.getMissedCleavages(), ed2.getMissedCleavages()); TEST_EQUAL(ed.getEnzyme(), ed2.getEnzyme()); TEST_EQUAL(ed.getSpecificity(), ed2.getSpecificity()); TEST_EQUAL(ed.isLogModelEnabled(), ed2.isLogModelEnabled()); TEST_EQUAL(ed.getLogThreshold(), ed2.getLogThreshold()); END_SECTION