OpenMS::TargetedExperiment::Peptide MRMDecoy::shufflePeptide( OpenMS::TargetedExperiment::Peptide peptide, double identity_threshold, int seed, int max_attempts, bool replace_aa_instead_append) { #ifdef DEBUG_MRMDECOY std::cout << " shuffle peptide " << peptide.sequence << std::endl; seed = 41; #endif if (seed == -1) { seed = time(0); } OpenMS::TargetedExperiment::Peptide shuffled = peptide; boost::mt19937 generator(seed); boost::uniform_int<> uni_dist; boost::variate_generator<boost::mt19937&, boost::uniform_int<> > pseudoRNG(generator, uni_dist); typedef std::vector<std::pair<std::string::size_type, std::string> > IndexType; IndexType idx = MRMDecoy::find_all_tryptic(peptide.sequence); std::string aa[] = { "A", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "M", "F", "S", "T", "W", "Y", "V" }; int aa_size = 17; int attempts = 0; // loop: copy the original peptide, attempt to shuffle it and check whether difference is large enough while (MRMDecoy::AASequenceIdentity(peptide.sequence, shuffled.sequence) > identity_threshold && attempts < max_attempts) { shuffled = peptide; std::vector<Size> peptide_index; for (Size i = 0; i < peptide.sequence.size(); i++) { peptide_index.push_back(i); } // we erase the indices where K/P/R are (from the back / in reverse order // to not delete indices we access later) for (IndexType::reverse_iterator it = idx.rbegin(); it != idx.rend(); ++it) { peptide_index.erase(peptide_index.begin() + it->first); } // shuffle the peptide index (without the K/P/R which we leave in place) // one could also use std::random_shuffle here but then the code becomes // untestable since the implementation of std::random_shuffle differs // between libc++ (llvm/mac-osx) and libstdc++ (gcc) and VS // see also https://code.google.com/p/chromium/issues/detail?id=358564 // the actual code here for the shuffling is based on the implementation of // std::random_shuffle in libstdc++ if (peptide_index.begin() != peptide_index.end()) { for (std::vector<Size>::iterator pI_it = peptide_index.begin() + 1; pI_it != peptide_index.end(); ++pI_it) { // swap current position with random element from vector // swapping positions are random in range [0, current_position + 1) // which can be at most [0, n) std::iter_swap(pI_it, peptide_index.begin() + pseudoRNG((pI_it - peptide_index.begin()) + 1)); } } // re-insert the missing K/P/R at the appropriate places for (IndexType::iterator it = idx.begin(); it != idx.end(); ++it) { peptide_index.insert(peptide_index.begin() + it->first, it->first); } // use the shuffled index to create the get the new peptide sequence and // then to place the modifications at their appropriate places (at the // same, shuffled AA where they were before). for (Size i = 0; i < peptide_index.size(); i++) { shuffled.sequence[i] = peptide.sequence[peptide_index[i]]; } for (Size j = 0; j < shuffled.mods.size(); j++) { for (Size k = 0; k < peptide_index.size(); k++) { // C and N terminal mods are implicitly not shuffled because they live at positions -1 and sequence.size() if (boost::numeric_cast<int>(peptide_index[k]) == shuffled.mods[j].location) { shuffled.mods[j].location = boost::numeric_cast<int>(k); break; } } } #ifdef DEBUG_MRMDECOY for (Size j = 0; j < shuffled.mods.size(); j++) { std::cout << " position after shuffling " << shuffled.mods[j].location << " mass difference " << shuffled.mods[j].mono_mass_delta << std::endl; } #endif ++attempts; // If our attempts have failed so far, we will append two random AA to // the sequence and see whether we can achieve sufficient shuffling with // these additional AA added to the sequence. if (attempts % 10 == 9) { if (replace_aa_instead_append) { OpenMS::AASequence shuffled_sequence = TargetedExperimentHelper::getAASequence(shuffled); int res_pos = (pseudoRNG() % aa_size); int pep_pos = -1; size_t pos_trials = 0; while (pep_pos < 0 && pos_trials < shuffled_sequence.size()) { pep_pos = (pseudoRNG() % shuffled_sequence.size()); if (shuffled_sequence[pep_pos].isModified() || (shuffled_sequence.hasNTerminalModification() && pep_pos == 0) || (shuffled_sequence.hasNTerminalModification() && pep_pos == (int)(shuffled_sequence.size() - 1))) { pep_pos = -1; } else { if (pep_pos == 0) { shuffled_sequence = AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1); } else if (pep_pos == (int)(shuffled_sequence.size() - 1)) { shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]); } else { shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1); } } ++pos_trials; } shuffled.sequence = shuffled_sequence.toUnmodifiedString(); peptide = shuffled; } else { int pos = (pseudoRNG() % aa_size); peptide.sequence.append(aa[pos]); pos = (pseudoRNG() % aa_size); peptide.sequence.append(aa[pos]); // now make the shuffled peptide the same length as the new peptide shuffled = peptide; } } } return shuffled; }