// divide and conquer algorithm of the sequencing void CompNovoIdentificationCID::getDecompositionsDAC_(set<String> & sequences, Size left, Size right, double peptide_weight, const PeakSpectrum & CID_spec, Map<double, CompNovoIonScoringCID::IonScore> & ion_scores) { static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); double offset_suffix(CID_spec[left].getPosition()[0] - oxonium_mass); double offset_prefix(peptide_weight - CID_spec[right].getPosition()[0]); #ifdef DAC_DEBUG static Int depth_(0); ++depth_; String tabs_(depth_, '\t'); cerr << tabs_ << "void getDecompositionsDAC(sequences[" << sequences.size() << "], " << left << ", " << right << ") "; cerr << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " diff="; #endif double diff = CID_spec[right].getPosition()[0] - CID_spec[left].getPosition()[0]; #ifdef DAC_DEBUG cerr << diff << endl; cerr << "offset_prefix=" << offset_prefix << ", offset_suffix=" << offset_suffix << endl; #endif if (subspec_to_sequences_.has(left) && subspec_to_sequences_[left].has(right)) { sequences = subspec_to_sequences_[left][right]; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "from cache DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << " " << left << " " << right << endl; #endif return; } // no further solutions possible? if (diff < min_aa_weight_) { #ifdef DAC_DEBUG depth_--; #endif return; } // no further division needed? if (diff <= max_decomp_weight_) { vector<MassDecomposition> decomps; // if we are at the C-terminus use precursor_mass_tolerance_ if (offset_prefix < precursor_mass_tolerance_) { Param decomp_param(mass_decomp_algorithm_.getParameters()); decomp_param.setValue("tolerance", precursor_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); getDecompositions_(decomps, diff); decomp_param.setValue("tolerance", fragment_mass_tolerance_); mass_decomp_algorithm_.setParameters(decomp_param); } else { getDecompositions_(decomps, diff); } //filterDecomps_(decomps); #ifdef DAC_DEBUG cerr << tabs_ << "Found " << decomps.size() << " decomps" << endl; cerr << tabs_ << "Permuting..."; #endif //static Map<String, set<String> > permute_cache; for (vector<MassDecomposition>::const_iterator it = decomps.begin(); it != decomps.end(); ++it) { #ifdef DAC_DEBUG cerr << it->toString() << endl; #endif String exp_string = it->toExpandedString(); if (!permute_cache_.has(exp_string)) { permute_("", exp_string, sequences); permute_cache_[exp_string] = sequences; } else { sequences = permute_cache_[exp_string]; } } #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; if (sequences.size() > max_subscore_number_) { cerr << tabs_ << "Reducing #sequences from " << sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; } #endif // C-terminus if (offset_suffix <= precursor_mass_tolerance_) { filterPermuts_(sequences); } // reduce the sequences reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG cerr << "ended" << endl; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; depth_--; #endif return; } // select suitable pivot peaks vector<Size> pivots; if (offset_suffix < precursor_mass_tolerance_ && offset_prefix < precursor_mass_tolerance_) { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, true); } else { selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, false); } // run divide step #ifdef DAC_DEBUG cerr << tabs_ << "Selected " << pivots.size() << " pivot ions: "; for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { cerr << *it << "(" << CID_spec[*it].getPosition()[0] << ") "; } cerr << endl; #endif for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it) { set<String> seq1, seq2, new_sequences; // the smaller the 'gap' the greater the chance of not finding anything // so we we compute the smaller gap first double diff1(CID_spec[*it].getPosition()[0] - CID_spec[left].getPosition()[0]); double diff2(CID_spec[right].getPosition()[0] - CID_spec[*it].getPosition()[0]); if (diff1 < diff2) { getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); if (seq1.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "first call produced 0 candidates (" << diff1 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); } else { getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores); if (seq2.empty()) { #ifdef DAC_DEBUG cerr << tabs_ << "second call produced 0 candidates (" << diff2 << ")" << endl; #endif continue; } getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores); } #ifdef DAC_DEBUG cerr << tabs_ << "Found " << seq1.size() << " solutions (1) " << diff1 << endl; cerr << tabs_ << "Found " << seq2.size() << " solutions (2) " << diff2 << endl; cerr << tabs_ << "inserting " << seq1.size() * seq2.size() << " sequences" << endl; #endif // C-terminus if (offset_suffix <= fragment_mass_tolerance_) { filterPermuts_(seq1); } // test if we found enough sequence candidates if (seq1.empty() || seq2.empty()) { continue; } for (set<String>::const_iterator it1 = seq1.begin(); it1 != seq1.end(); ++it1) { for (set<String>::const_iterator it2 = seq2.begin(); it2 != seq2.end(); ++it2) { new_sequences.insert(*it2 + *it1); } } if (seq1.size() * seq2.size() > max_subscore_number_ /* && (offset_prefix > fragment_mass_tolerance_ || offset_suffix > fragment_mass_tolerance_)*/) { #ifdef DAC_DEBUG cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl; cerr << tabs_ << "Reducing #sequences from " << new_sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix << ", suffix=" << offset_suffix << ")..."; #endif if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(new_sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG for (set<String>::const_iterator it1 = new_sequences.begin(); it1 != new_sequences.end(); ++it1) { cerr << tabs_ << *it1 << endl; } cerr << endl; #endif } for (set<String>::const_iterator sit = new_sequences.begin(); sit != new_sequences.end(); ++sit) { sequences.insert(*sit); } } #ifdef DAC_DEBUG cerr << tabs_ << "Found sequences for " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << endl; for (set<String>::const_iterator sit = sequences.begin(); sit != sequences.end(); ++sit) { cerr << tabs_ << *sit << endl; } #endif // reduce the permuts once again to reduce complexity if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_) { reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix); } #ifdef DAC_DEBUG cerr << "Writing to cache " << left << " " << right << endl; #endif subspec_to_sequences_[left][right] = sequences; #ifdef DAC_DEBUG depth_--; cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl; #endif return; }
void CompNovoIdentificationBase::updateMembers_() { // init residue mass table String residue_set(param_.getValue("residue_set")); set<const Residue *> residues = ResidueDB::getInstance()->getResidues(residue_set); for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it) { aa_to_weight_[(*it)->getOneLetterCode()[0]] = (*it)->getMonoWeight(Residue::Internal); } max_number_aa_per_decomp_ = param_.getValue("max_number_aa_per_decomp"); tryptic_only_ = param_.getValue("tryptic_only").toBool(); fragment_mass_tolerance_ = (DoubleReal)param_.getValue("fragment_mass_tolerance"); max_number_pivot_ = param_.getValue("max_number_pivot"); decomp_weights_precision_ = (DoubleReal)param_.getValue("decomp_weights_precision"); min_mz_ = (DoubleReal)param_.getValue("min_mz"); max_mz_ = (DoubleReal)param_.getValue("max_mz"); max_decomp_weight_ = (DoubleReal)param_.getValue("max_decomp_weight"); max_subscore_number_ = param_.getValue("max_subscore_number"); max_isotope_ = param_.getValue("max_isotope"); name_to_residue_.clear(); residue_to_name_.clear(); // now handle the modifications ModificationDefinitionsSet mod_set((StringList)param_.getValue("fixed_modifications"), (StringList)param_.getValue("variable_modifications")); set<ModificationDefinition> fixed_mods = mod_set.getFixedModifications(); for (set<ModificationDefinition>::const_iterator it = fixed_mods.begin(); it != fixed_mods.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification()); char aa = ' '; if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X") { cerr << "Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { aa = mod.getOrigin()[0]; } if (mod.getMonoMass() != 0) { aa_to_weight_[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight_[aa] += mod.getDiffMonoMass(); } else { cerr << "Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } //cerr << "Setting fixed modification " << it->getModification() << " of amino acid '" << aa << "'; weight = " << aa_to_weight_[aa] << endl; const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification()); name_to_residue_[aa] = res; residue_to_name_[res] = aa; } const StringList mod_names(StringList::create("a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z")); vector<String>::const_iterator actual_mod_name = mod_names.begin(); set<ModificationDefinition> var_mods = mod_set.getVariableModifications(); for (set<ModificationDefinition>::const_iterator it = var_mods.begin(); it != var_mods.end(); ++it) { ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification()); char aa = (*actual_mod_name)[0]; char origin_aa = ' '; ++actual_mod_name; if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X") { cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl; continue; } else { origin_aa = mod.getOrigin()[0]; } if (mod.getMonoMass() != 0) { aa_to_weight_[aa] = mod.getMonoMass(); } else { if (mod.getDiffMonoMass() != 0) { aa_to_weight_[aa] = aa_to_weight_[origin_aa] + mod.getDiffMonoMass(); } else { cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl; continue; } } //cerr << "Mapping variable modification " << it->getModification() << " to letter '" << aa << "' (@" << origin_aa << "); weight = " << aa_to_weight_[aa] << endl; const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification()); name_to_residue_[aa] = res; residue_to_name_[res] = aa; } /* cerr << "Following masses are used for identification: " << endl; for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it) { cerr << it->first << " " << precisionWrapper(it->second) << endl; }*/ initIsotopeDistributions_(); Param decomp_param(mass_decomp_algorithm_.getParameters()); decomp_param.setValue("tolerance", fragment_mass_tolerance_); decomp_param.setValue("fixed_modifications", (StringList)param_.getValue("fixed_modifications")); decomp_param.setValue("variable_modifications", (StringList)param_.getValue("variable_modifications")); mass_decomp_algorithm_.setParameters(decomp_param); min_aa_weight_ = numeric_limits<DoubleReal>::max(); for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it) { if (min_aa_weight_ > it->second) { min_aa_weight_ = it->second; } } return; }