Beispiel #1
0
  void ConsensusIDAlgorithm::apply(vector<PeptideIdentification>& ids,
                                   Size number_of_runs)
  {
    // abort if no IDs present
    if (ids.empty())
    {
      return;
    }

    number_of_runs_ = (number_of_runs != 0) ? number_of_runs : ids.size();

    // prepare data here, so that it doesn't have to happen in each algorithm:
    for (vector<PeptideIdentification>::iterator pep_it = ids.begin(); 
         pep_it != ids.end(); ++pep_it)
    {
      pep_it->sort();
      if ((considered_hits_ > 0) &&
          (pep_it->getHits().size() > considered_hits_))
      {
        pep_it->getHits().resize(considered_hits_);
      }
    }
    // make sure there are no duplicated hits (by sequence):
    IDFilter::removeDuplicatePeptideHits(ids, true);

    SequenceGrouping results;
    apply_(ids, results); // actual (subclass-specific) processing

    String score_type = ids[0].getScoreType();
    bool higher_better = ids[0].isHigherScoreBetter();
    ids.clear();
    ids.resize(1);
    ids[0].setScoreType(score_type);
    ids[0].setHigherScoreBetter(higher_better);
    for (SequenceGrouping::iterator res_it = results.begin(); 
         res_it != results.end(); ++res_it)
    {
      OPENMS_PRECONDITION(!res_it->second.second.empty(),
                          "Consensus score for peptide required");
      PeptideHit hit;

      if (res_it->second.second.size() == 2)
      {
        // filter by "support" value:
        double support = res_it->second.second[1];
        if (support < min_support_) continue;
        hit.setMetaValue("consensus_support", support);
      }
      
      hit.setSequence(res_it->first);
      hit.setCharge(res_it->second.first);
      hit.setScore(res_it->second.second[0]);
      ids[0].insertHit(hit);
#ifdef DEBUG_ID_CONSENSUS
      LOG_DEBUG << " - Output hit: " << hit.getSequence() << " "
                << hit.getScore() << endl;
#endif
    }
    ids[0].assignRanks();
  }
  void ConsensusIDAlgorithmIdentity::apply_(vector<PeptideIdentification>& ids,
                                            SequenceGrouping& results)
  {
    preprocess_(ids);

    // group peptide hits by sequence:
    for (vector<PeptideIdentification>::iterator pep_it = ids.begin();
         pep_it != ids.end(); ++pep_it)
    {
      for (vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
           hit_it != pep_it->getHits().end(); ++hit_it)
      {
        const AASequence& seq = hit_it->getSequence();
        SequenceGrouping::iterator pos = results.find(seq);
        if (pos == results.end()) // new sequence
        {
          results[seq] = make_pair(hit_it->getCharge(), 
                                   vector<double>(1, hit_it->getScore()));
        }
        else // previously seen sequence
        {
          compareChargeStates_(pos->second.first, hit_it->getCharge(),
                               pos->first);
          pos->second.second.push_back(hit_it->getScore());
        }
      }
    }

    // calculate score and support, and update results with them:
    bool higher_better = ids[0].isHigherScoreBetter();
    Size n_other_ids = (count_empty_ ? number_of_runs_ : ids.size()) - 1;
    for (SequenceGrouping::iterator res_it = results.begin(); 
         res_it != results.end(); ++res_it)
    {
      double score = getAggregateScore_(res_it->second.second, higher_better);
      // if 'count_empty' is false, 'n_other_ids' may be zero, in which case
      // we define the support to be one to avoid a NaN:
      double support = 1.0;
      if (n_other_ids > 0) // the normal case
      {
        support = (res_it->second.second.size() - 1.0) / n_other_ids;
      }
      res_it->second.second.resize(2);
      res_it->second.second[0] = score;
      res_it->second.second[1] = support;
    }
  }