void ConsensusIDAlgorithm::apply(vector<PeptideIdentification>& ids,
                                   Size number_of_runs)
  {
    // abort if no IDs present
    if (ids.empty())
    {
      return;
    }

    number_of_runs_ = (number_of_runs != 0) ? number_of_runs : ids.size();

    // prepare data here, so that it doesn't have to happen in each algorithm:
    for (vector<PeptideIdentification>::iterator pep_it = ids.begin(); 
         pep_it != ids.end(); ++pep_it)
    {
      pep_it->sort();
      if ((considered_hits_ > 0) &&
          (pep_it->getHits().size() > considered_hits_))
      {
        pep_it->getHits().resize(considered_hits_);
      }
    }
    // make sure there are no duplicated hits (by sequence):
    IDFilter::removeDuplicatePeptideHits(ids, true);

    SequenceGrouping results;
    apply_(ids, results); // actual (subclass-specific) processing

    String score_type = ids[0].getScoreType();
    bool higher_better = ids[0].isHigherScoreBetter();
    ids.clear();
    ids.resize(1);
    ids[0].setScoreType(score_type);
    ids[0].setHigherScoreBetter(higher_better);
    for (SequenceGrouping::iterator res_it = results.begin(); 
         res_it != results.end(); ++res_it)
    {
      OPENMS_PRECONDITION(!res_it->second.second.empty(),
                          "Consensus score for peptide required");
      PeptideHit hit;

      if (res_it->second.second.size() == 2)
      {
        // filter by "support" value:
        double support = res_it->second.second[1];
        if (support < min_support_) continue;
        hit.setMetaValue("consensus_support", support);
      }
      
      hit.setSequence(res_it->first);
      hit.setCharge(res_it->second.first);
      hit.setScore(res_it->second.second[0]);
      ids[0].insertHit(hit);
#ifdef DEBUG_ID_CONSENSUS
      LOG_DEBUG << " - Output hit: " << hit.getSequence() << " "
                << hit.getScore() << endl;
#endif
    }
    ids[0].assignRanks();
  }
Exemple #2
0
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec)
{
    //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0)
    //{
    //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl;
    //return;
    //}

    PeakSpectrum new_CID_spec(CID_spec);
    windowMower_(new_CID_spec, 0.3, 1);

    Param zhang_param;
    zhang_param = zhang_.getParameters();
    zhang_param.setValue("tolerance", fragment_mass_tolerance_);
    zhang_param.setValue("use_gaussian_factor", "true");
    zhang_param.setValue("use_linear_factor", "false");
    zhang_.setParameters(zhang_param);


    Normalizer normalizer;
    Param n_param(normalizer.getParameters());
    n_param.setValue("method", "to_one");
    normalizer.setParameters(n_param);
    normalizer.filterSpectrum(new_CID_spec);

    Size charge(2);
    double precursor_weight(0);     // [M+H]+
    if (!CID_spec.getPrecursors().empty())
    {
        // believe charge of spectrum?
        if (CID_spec.getPrecursors().begin()->getCharge() != 0)
        {
            charge = CID_spec.getPrecursors().begin()->getCharge();
        }
        else
        {
            // TODO estimate charge state
        }
        precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U);
    }

    //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl;

    // now delete all peaks that are right of the estimated precursor weight
    Size peak_counter(0);
    for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter)
    {
        if (it->getPosition()[0] > precursor_weight)
        {
            break;
        }
    }
    if (peak_counter < new_CID_spec.size())
    {
        new_CID_spec.resize(peak_counter);
    }


    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();

    Peak1D p;
    p.setIntensity(1);
    p.setPosition(oxonium_mass);

    new_CID_spec.push_back(p);

    p.setPosition(precursor_weight);
    new_CID_spec.push_back(p);

    // add complement to spectrum
    /*
    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
    // get m/z of complement
    double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U;

    // search if peaks are available that have similar m/z values
    Size count(0);
    bool found(false);
    for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count)
    {
    if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance)
    {
      // add peak intensity to corresponding peak in new_CID_spec
      new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity());
    }
    }
    if (!found)
    {
    // infer this peak
    Peak1D p;
    p.setIntensity(it1->getIntensity());
    p.setPosition(mz_comp);
    new_CID_spec.push_back(p);
    }
    }*/

    CompNovoIonScoringCID ion_scoring;
    Param ion_scoring_param(ion_scoring.getParameters());
    ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_);
    ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_);
    ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_);
    ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold"));
    ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score"));
    ion_scoring_param.setValue("max_isotope", max_isotope_);
    ion_scoring.setParameters(ion_scoring_param);

    Map<double, IonScore> ion_scores;
    ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge);

    new_CID_spec.sortByPosition();

    /*
    cerr << "Size of ion_scores " << ion_scores.size() << endl;
    for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        cerr << it->first << " " << it->second.score << endl;
    }*/

#ifdef WRITE_SCORED_SPEC
    PeakSpectrum filtered_spec(new_CID_spec);
    filtered_spec.clear();
    for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        Peak1D p;
        p.setIntensity(it->second.score);
        p.setPosition(it->first);
        filtered_spec.push_back(p);
    }
    DTAFile().store("spec_scored.dta", filtered_spec);
#endif

    set<String> sequences;
    getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores);

#ifdef SPIKE_IN
    sequences.insert("AFCVDGEGR");
    sequences.insert("APEFAAPWPDFVPR");
    sequences.insert("AVKQFEESQGR");
    sequences.insert("CCTESLVNR");
    sequences.insert("DAFLGSFLYEYSR");
    sequences.insert("DAIPENLPPLTADFAEDK");
    sequences.insert("DDNKVEDIWSFLSK");
    sequences.insert("DDPHACYSTVFDK");
    sequences.insert("DEYELLCLDGSR");
    sequences.insert("DGAESYKELSVLLPNR");
    sequences.insert("DGASCWCVDADGR");
    sequences.insert("DLFIPTCLETGEFAR");
    sequences.insert("DTHKSEIAHR");
    sequences.insert("DVCKNYQEAK");
    sequences.insert("EACFAVEGPK");
    sequences.insert("ECCHGDLLECADDR");
    sequences.insert("EFLGDKFYTVISSLK");
    sequences.insert("EFTPVLQADFQK");
    sequences.insert("ELFLDSGIFQPMLQGR");
    sequences.insert("ETYGDMADCCEK");
    sequences.insert("EVGCPSSSVQEMVSCLR");
    sequences.insert("EYEATLEECCAK");
    sequences.insert("FADLIQSGTFQLHLDSK");
    sequences.insert("FFSASCVPGATIEQK");
    sequences.insert("FLANVSTVLTSK");
    sequences.insert("FLSGSDYAIR");
    sequences.insert("FTASCPPSIK");
    sequences.insert("GAIEWEGIESGSVEQAVAK");
    sequences.insert("GDVAFIQHSTVEENTGGK");
    sequences.insert("GEPPSCAEDQSCPSER");
    sequences.insert("GEYVPTSLTAR");
    sequences.insert("GQEFTITGQKR");
    sequences.insert("GTFAALSELHCDK");
    sequences.insert("HLVDEPQNLIK");
    sequences.insert("HQDCLVTTLQTQPGAVR");
    sequences.insert("HTTVNENAPDQK");
    sequences.insert("ILDCGSPDTEVR");
    sequences.insert("KCPSPCQLQAER");
    sequences.insert("KGTEFTVNDLQGK");
    sequences.insert("KQTALVELLK");
    sequences.insert("KVPQVSTPTLVEVSR");
    sequences.insert("LALQFTTNAKR");
    sequences.insert("LCVLHEKTPVSEK");
    sequences.insert("LFTFHADICTLPDTEK");
    sequences.insert("LGEYGFQNALIVR");
    sequences.insert("LHVDPENFK");
    sequences.insert("LKECCDKPLLEK");
    sequences.insert("LKHLVDEPQNLIK");
    sequences.insert("LKPDPNTLCDEFK");
    sequences.insert("LLGNVLVVVLAR");
    sequences.insert("LLVVYPWTQR");
    sequences.insert("LRVDPVNFK");
    sequences.insert("LTDEELAFPPLSPSR");
    sequences.insert("LVNELTEFAK");
    sequences.insert("MFLSFPTTK");
    sequences.insert("MPCTEDYLSLILNR");
    sequences.insert("NAPYSGYSGAFHCLK");
    sequences.insert("NECFLSHKDDSPDLPK");
    sequences.insert("NEPNKVPACPGSCEEVK");
    sequences.insert("NLQMDDFELLCTDGR");
    sequences.insert("QAGVQAEPSPK");
    sequences.insert("RAPEFAAPWPDFVPR");
    sequences.insert("RHPEYAVSVLLR");
    sequences.insert("RPCFSALTPDETYVPK");
    sequences.insert("RSLLLAPEEGPVSQR");
    sequences.insert("SAFPPEPLLCSVQR");
    sequences.insert("SAGWNIPIGTLLHR");
    sequences.insert("SCWCVDEAGQK");
    sequences.insert("SGNPNYPHEFSR");
    sequences.insert("SHCIAEVEK");
    sequences.insert("SISSGFFECER");
    sequences.insert("SKYLASASTMDHAR");
    sequences.insert("SLHTLFGDELCK");
    sequences.insert("SLLLAPEEGPVSQR");
    sequences.insert("SPPQCSPDGAFRPVQCK");
    sequences.insert("SREGDPLAVYLK");
    sequences.insert("SRQIPQCPTSCER");
    sequences.insert("TAGTPVSIPVCDDSSVK");
    sequences.insert("TCVADESHAGCEK");
    sequences.insert("TQFGCLEGFGR");
    sequences.insert("TVMENFVAFVDK");
    sequences.insert("TYFPHFDLSHGSAQVK");
    sequences.insert("TYMLAFDVNDEK");
    sequences.insert("VDEVGGEALGR");
    sequences.insert("VDLLIGSSQDDGLINR");
    sequences.insert("VEDIWSFLSK");
    sequences.insert("VGGHAAEYGAEALER");
    sequences.insert("VGTRCCTKPESER");
    sequences.insert("VKVDEVGGEALGR");
    sequences.insert("VKVDLLIGSSQDDGLINR");
    sequences.insert("VLDSFSNGMK");
    sequences.insert("VLSAADKGNVK");
    sequences.insert("VPQVSTPTLVEVSR");
    sequences.insert("VTKCCTESLVNR");
    sequences.insert("VVAASDASQDALGCVK");
    sequences.insert("VVAGVANALAHR");
    sequences.insert("YICDNQDTISSK");
    sequences.insert("YLASASTMDHAR");
    sequences.insert("YNGVFQECCQAEDK");
#endif

    SpectrumAlignmentScore spectra_zhang;
    spectra_zhang.setParameters(zhang_param);

    vector<PeptideHit> hits;
    Size missed_cleavages = param_.getValue("missed_cleavages");
    for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it)
    {

        Size num_missed = countMissedCleavagesTryptic_(*it);
        if (missed_cleavages < num_missed)
        {
            //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl;
            continue;
        }
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, *it, charge);

        //normalizer.filterSpectrum(CID_sim_spec);

        double cid_score = zhang_(CID_sim_spec, CID_spec);

        PeptideHit hit;
        hit.setScore(cid_score);

        hit.setSequence(getModifiedAASequence_(*it));
        hit.setCharge((Int)charge);   //TODO unify charge interface: int or size?
        hits.push_back(hit);
        //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl;
    }

    // rescore the top hits
    id.setHits(hits);
    id.assignRanks();

    hits = id.getHits();

    SpectrumAlignmentScore alignment_score;
    Param align_param(alignment_score.getParameters());
    align_param.setValue("tolerance", fragment_mass_tolerance_);
    align_param.setValue("use_linear_factor", "true");
    alignment_score.setParameters(align_param);

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits");
    if (hits.size() > number_of_prescoring_hits)
    {
        hits.resize(number_of_prescoring_hits);
    }

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge);

        normalizer.filterSpectrum(CID_sim_spec);

        //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec);

        //double cid_score = spectra_zhang(CID_sim_spec, CID_spec);
        double cid_score = alignment_score(CID_sim_spec, CID_spec);

        //cerr << "Final: " << it->getSequence() << " " << cid_score << endl;

        it->setScore(cid_score);
    }

    id.setHits(hits);
    id.assignRanks();
    hits = id.getHits();

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_hits = param_.getValue("number_of_hits");
    if (id.getHits().size() > number_of_hits)
    {
        hits.resize(number_of_hits);
    }

    id.setHits(hits);
    id.assignRanks();

    return;
}