void CompNovoIdentificationBase::permute_(String prefix, String s, set<String> & permutations)
 {
   if (s.size() <= 1)
   {
     permutations.insert(prefix + s);
   }
   else
   {
     for (String::Iterator p = s.begin(); p < s.end(); p++)
     {
       char c = *p;
       p = s.erase(p);
       permute_(prefix + c, s, permutations);
       s.insert(p, c);
     }
   }
 }
示例#2
0
// divide and conquer algorithm of the sequencing
void CompNovoIdentificationCID::getDecompositionsDAC_(set<String> & sequences, Size left, Size right, double peptide_weight, const PeakSpectrum & CID_spec, Map<double, CompNovoIonScoringCID::IonScore> & ion_scores)
{
    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();
    double offset_suffix(CID_spec[left].getPosition()[0] - oxonium_mass);
    double offset_prefix(peptide_weight - CID_spec[right].getPosition()[0]);

#ifdef DAC_DEBUG
    static Int depth_(0);
    ++depth_;
    String tabs_(depth_, '\t');
    cerr << tabs_ << "void getDecompositionsDAC(sequences[" << sequences.size() << "], " << left << ", " << right << ") ";
    cerr << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " diff=";
#endif

    double diff = CID_spec[right].getPosition()[0] - CID_spec[left].getPosition()[0];

#ifdef DAC_DEBUG
    cerr << diff << endl;
    cerr << "offset_prefix=" << offset_prefix << ", offset_suffix=" << offset_suffix << endl;
#endif

    if (subspec_to_sequences_.has(left) && subspec_to_sequences_[left].has(right))
    {
        sequences = subspec_to_sequences_[left][right];

#ifdef DAC_DEBUG
        depth_--;
        cerr << tabs_ << "from cache DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << " " << left << " " << right << endl;
#endif
        return;
    }

    // no further solutions possible?
    if (diff < min_aa_weight_)
    {
#ifdef DAC_DEBUG
        depth_--;
#endif
        return;
    }

    // no further division needed?
    if (diff <= max_decomp_weight_)
    {
        vector<MassDecomposition> decomps;

        // if we are at the C-terminus use precursor_mass_tolerance_
        if (offset_prefix < precursor_mass_tolerance_)
        {
            Param decomp_param(mass_decomp_algorithm_.getParameters());
            decomp_param.setValue("tolerance", precursor_mass_tolerance_);
            mass_decomp_algorithm_.setParameters(decomp_param);
            getDecompositions_(decomps, diff);
            decomp_param.setValue("tolerance", fragment_mass_tolerance_);
            mass_decomp_algorithm_.setParameters(decomp_param);
        }
        else
        {
            getDecompositions_(decomps, diff);
        }
        //filterDecomps_(decomps);

#ifdef DAC_DEBUG
        cerr << tabs_ << "Found " << decomps.size() << " decomps" << endl;
        cerr << tabs_ << "Permuting...";
#endif

        //static Map<String, set<String> > permute_cache;
        for (vector<MassDecomposition>::const_iterator it = decomps.begin(); it != decomps.end(); ++it)
        {
#ifdef DAC_DEBUG
            cerr << it->toString() << endl;
#endif

            String exp_string = it->toExpandedString();
            if (!permute_cache_.has(exp_string))
            {
                permute_("", exp_string, sequences);
                permute_cache_[exp_string] = sequences;
            }
            else
            {
                sequences = permute_cache_[exp_string];
            }
        }

#ifdef DAC_DEBUG
        cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl;
        if (sequences.size() > max_subscore_number_)
        {
            cerr << tabs_ << "Reducing #sequences from " << sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix  << ", suffix=" << offset_suffix << ")...";
        }
#endif

        // C-terminus
        if (offset_suffix <= precursor_mass_tolerance_)
        {
            filterPermuts_(sequences);
        }

        // reduce the sequences
        reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix);
#ifdef DAC_DEBUG
        cerr << "Writing to cache " << left << " " << right << endl;
#endif
        subspec_to_sequences_[left][right] = sequences;

#ifdef DAC_DEBUG
        cerr << "ended" << endl;
        cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl;
        depth_--;
#endif

        return;
    }

    // select suitable pivot peaks
    vector<Size> pivots;

    if (offset_suffix < precursor_mass_tolerance_ && offset_prefix < precursor_mass_tolerance_)
    {
        selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, true);
    }
    else
    {
        selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, false);
    }

    // run divide step
#ifdef DAC_DEBUG
    cerr << tabs_ << "Selected " << pivots.size() << " pivot ions: ";
    for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it)
    {
        cerr << *it << "(" << CID_spec[*it].getPosition()[0] << ") ";
    }
    cerr << endl;
#endif

    for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it)
    {
        set<String> seq1, seq2, new_sequences;

        // the smaller the 'gap' the greater the chance of not finding anything
        // so we we compute the smaller gap first
        double diff1(CID_spec[*it].getPosition()[0] - CID_spec[left].getPosition()[0]);
        double diff2(CID_spec[right].getPosition()[0] - CID_spec[*it].getPosition()[0]);

        if (diff1 < diff2)
        {
            getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores);
            if (seq1.empty())
            {
#ifdef DAC_DEBUG
                cerr << tabs_ << "first call produced 0 candidates (" << diff1 << ")" << endl;
#endif
                continue;
            }

            getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores);
        }
        else
        {
            getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores);
            if (seq2.empty())
            {
#ifdef DAC_DEBUG
                cerr << tabs_ << "second call produced 0 candidates (" << diff2 << ")" << endl;
#endif
                continue;
            }

            getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores);
        }

#ifdef DAC_DEBUG
        cerr << tabs_ << "Found " << seq1.size() << " solutions (1) " << diff1 << endl;
        cerr << tabs_ << "Found " << seq2.size() << " solutions (2) " << diff2 << endl;
        cerr << tabs_ << "inserting " << seq1.size() * seq2.size()  << " sequences" << endl;
#endif

        // C-terminus
        if (offset_suffix <= fragment_mass_tolerance_)
        {
            filterPermuts_(seq1);
        }

        // test if we found enough sequence candidates
        if (seq1.empty() || seq2.empty())
        {
            continue;
        }

        for (set<String>::const_iterator it1 = seq1.begin(); it1 != seq1.end(); ++it1)
        {
            for (set<String>::const_iterator it2 = seq2.begin(); it2 != seq2.end(); ++it2)
            {
                new_sequences.insert(*it2 + *it1);
            }
        }

        if (seq1.size() * seq2.size() > max_subscore_number_ /* && (offset_prefix > fragment_mass_tolerance_ || offset_suffix > fragment_mass_tolerance_)*/)
        {
#ifdef DAC_DEBUG
            cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl;
            cerr << tabs_ << "Reducing #sequences from " << new_sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix  << ", suffix=" << offset_suffix << ")...";
#endif
            if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_)
            {
                reducePermuts_(new_sequences, CID_spec, offset_prefix, offset_suffix);
            }

#ifdef DAC_DEBUG
            for (set<String>::const_iterator it1 = new_sequences.begin(); it1 != new_sequences.end(); ++it1)
            {
                cerr << tabs_ << *it1 << endl;
            }
            cerr << endl;
#endif
        }

        for (set<String>::const_iterator sit = new_sequences.begin(); sit != new_sequences.end(); ++sit)
        {
            sequences.insert(*sit);
        }
    }
#ifdef DAC_DEBUG
    cerr << tabs_ << "Found sequences for " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << endl;
    for (set<String>::const_iterator sit = sequences.begin(); sit != sequences.end(); ++sit)
    {
        cerr << tabs_ << *sit << endl;
    }
#endif

    // reduce the permuts once again to reduce complexity
    if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_)
    {
        reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix);
    }

#ifdef DAC_DEBUG
    cerr << "Writing to cache " << left << " " << right << endl;
#endif

    subspec_to_sequences_[left][right] = sequences;

#ifdef DAC_DEBUG
    depth_--;
    cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl;
#endif
    return;

}