void CompNovoIdentificationBase::permute_(String prefix, String s, set<String> & permutations) { if (s.size() <= 1) { permutations.insert(prefix + s); } else { for (String::Iterator p = s.begin(); p < s.end(); p++) { char c = *p; p = s.erase(p); permute_(prefix + c, s, permutations); s.insert(p, c); } } }
// divide and conquer algorithm of the sequencing void CompNovoIdentificationCID::getDecompositionsDAC_(set<String> & sequences, Size left, Size right, double peptide_weight, const PeakSpectrum & CID_spec, Map<double, CompNovoIonScoringCID::IonScore> & ion_scores) { static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); double offset_suffix(CID_spec[left].getPosition()[0] - oxonium_mass); double offset_prefix(peptide_weight - CID_spec[right].getPosition()[0]); #ifdef DAC_DEBUG static Int depth_(0); ++depth_; String tabs_(depth_, '\t'); cerr << tabs_ << "void getDecompositionsDAC(sequences[" << sequences.size() << "], " << left << ", " << right << ") "; cerr << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " diff="; #endif double diff = CID_spec[right].getPosition()[0] - CID_spec[left].getPosition()[0]; #ifdef DAC_DEBUG cerr << diff << endl; cerr << "offset_prefix=" << offset_prefix << ", offset_suffix=" << offset_suffix << endl; #endif if (subspec_to_sequences_.has(left) && subspec_to_sequences_[left].has(right)) { sequences = subspec_to_sequences_[left][right]; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "from cache DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << " " << left << " " << right << endl; #endif return; } // no further solutions possible? if (diff < min_aa_weight_) { #ifdef DAC_DEBUG depth_--; #endif return; } // no further division needed? if (diff <= max_decomp_weight_) { vector<MassDecomposition> decomps; // if we are at the C-terminus use precursor_mass_tolerance_ if (offset_prefix < precursor_mass_tolerance_) { Param decomp_param(mass_decomp_algorithm_.getParameters()); decomp_param.setValue("tolerance", precursor_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); getDecompositions_(decomps, diff); decomp_param.setValue("tolerance", fragment_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); } else { getDecompositions_(decomps, diff); } //filterDecomps_(decomps); #ifdef DAC_DEBUG cerr << tabs_ << "Found " << decomps.size() << " decomps" << endl; cerr << tabs_ << "Permuting..."; #endif //static Map<String, set<String> > permute_cache; for (vector<MassDecomposition>::const_iterator it = decomps.begin(); it != decomps.end(); ++it) { #ifdef DAC_DEBUG cerr << it->toString() << endl; #endif String exp_string = it->toExpandedString(); if (!permute_cache_.has(exp_string)) { permute_("", exp_string, sequences); permute_cache_[exp_string] = sequences; } else { sequences = permute_cache_[exp_string]; } } #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; if (sequences.size() > max_subscore_number_) { cerr << tabs_ << "Reducing #sequences from " << sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; } #endif // C-terminus if (offset_suffix <= precursor_mass_tolerance_) { filterPermuts_(sequences); } // reduce the sequences reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG cerr << "ended" << endl; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; depth_--; #endif return; } // select suitable pivot peaks vector<Size> pivots; if (offset_suffix < precursor_mass_tolerance_ && offset_prefix < precursor_mass_tolerance_) { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, true); } else { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, false); } // run divide step #ifdef DAC_DEBUG cerr << tabs_ << "Selected " << pivots.size() << " pivot ions: "; for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { cerr << *it << "(" << CID_spec[*it].getPosition()[0] << ") "; } cerr << endl; #endif for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { set<String> seq1, seq2, new_sequences; // the smaller the 'gap' the greater the chance of not finding anything // so we we compute the smaller gap first double diff1(CID_spec[*it].getPosition()[0] - CID_spec[left].getPosition()[0]); double diff2(CID_spec[right].getPosition()[0] - CID_spec[*it].getPosition()[0]); if (diff1 < diff2) { getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); if (seq1.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "first call produced 0 candidates (" << diff1 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); } else { getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); if (seq2.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "second call produced 0 candidates (" << diff2 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); } #ifdef DAC_DEBUG cerr << tabs_ << "Found " << seq1.size() << " solutions (1) " << diff1 << endl; cerr << tabs_ << "Found " << seq2.size() << " solutions (2) " << diff2 << endl; cerr << tabs_ << "inserting " << seq1.size() * seq2.size() << " sequences" << endl; #endif // C-terminus if (offset_suffix <= fragment_mass_tolerance_) { filterPermuts_(seq1); } // test if we found enough sequence candidates if (seq1.empty() || seq2.empty()) { continue; } for (set<String>::const_iterator it1 = seq1.begin(); it1 != seq1.end(); ++it1) { for (set<String>::const_iterator it2 = seq2.begin(); it2 != seq2.end(); ++it2) { new_sequences.insert(*it2 + *it1); } } if (seq1.size() * seq2.size() > max_subscore_number_ /* && (offset_prefix > fragment_mass_tolerance_ || offset_suffix > fragment_mass_tolerance_)*/) { #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; cerr << tabs_ << "Reducing #sequences from " << new_sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; #endif if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(new_sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG for (set<String>::const_iterator it1 = new_sequences.begin(); it1 != new_sequences.end(); ++it1) { cerr << tabs_ << *it1 << endl; } cerr << endl; #endif } for (set<String>::const_iterator sit = new_sequences.begin(); sit != new_sequences.end(); ++sit) { sequences.insert(*sit); } } #ifdef DAC_DEBUG cerr << tabs_ << "Found sequences for " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << endl; for (set<String>::const_iterator sit = sequences.begin(); sit != sequences.end(); ++sit) { cerr << tabs_ << *sit << endl; } #endif // reduce the permuts once again to reduce complexity if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; #endif return; }