void ConsensusIDAlgorithm::apply(vector<PeptideIdentification>& ids, Size number_of_runs) { // abort if no IDs present if (ids.empty()) { return; } number_of_runs_ = (number_of_runs != 0) ? number_of_runs : ids.size(); // prepare data here, so that it doesn't have to happen in each algorithm: for (vector<PeptideIdentification>::iterator pep_it = ids.begin(); pep_it != ids.end(); ++pep_it) { pep_it->sort(); if ((considered_hits_ > 0) && (pep_it->getHits().size() > considered_hits_)) { pep_it->getHits().resize(considered_hits_); } } // make sure there are no duplicated hits (by sequence): IDFilter::removeDuplicatePeptideHits(ids, true); SequenceGrouping results; apply_(ids, results); // actual (subclass-specific) processing String score_type = ids[0].getScoreType(); bool higher_better = ids[0].isHigherScoreBetter(); ids.clear(); ids.resize(1); ids[0].setScoreType(score_type); ids[0].setHigherScoreBetter(higher_better); for (SequenceGrouping::iterator res_it = results.begin(); res_it != results.end(); ++res_it) { OPENMS_PRECONDITION(!res_it->second.second.empty(), "Consensus score for peptide required"); PeptideHit hit; if (res_it->second.second.size() == 2) { // filter by "support" value: double support = res_it->second.second[1]; if (support < min_support_) continue; hit.setMetaValue("consensus_support", support); } hit.setSequence(res_it->first); hit.setCharge(res_it->second.first); hit.setScore(res_it->second.second[0]); ids[0].insertHit(hit); #ifdef DEBUG_ID_CONSENSUS LOG_DEBUG << " - Output hit: " << hit.getSequence() << " " << hit.getScore() << endl; #endif } ids[0].assignRanks(); }
void ConsensusIDAlgorithmIdentity::apply_(vector<PeptideIdentification>& ids, SequenceGrouping& results) { preprocess_(ids); // group peptide hits by sequence: for (vector<PeptideIdentification>::iterator pep_it = ids.begin(); pep_it != ids.end(); ++pep_it) { for (vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it) { const AASequence& seq = hit_it->getSequence(); SequenceGrouping::iterator pos = results.find(seq); if (pos == results.end()) // new sequence { results[seq] = make_pair(hit_it->getCharge(), vector<double>(1, hit_it->getScore())); } else // previously seen sequence { compareChargeStates_(pos->second.first, hit_it->getCharge(), pos->first); pos->second.second.push_back(hit_it->getScore()); } } } // calculate score and support, and update results with them: bool higher_better = ids[0].isHigherScoreBetter(); Size n_other_ids = (count_empty_ ? number_of_runs_ : ids.size()) - 1; for (SequenceGrouping::iterator res_it = results.begin(); res_it != results.end(); ++res_it) { double score = getAggregateScore_(res_it->second.second, higher_better); // if 'count_empty' is false, 'n_other_ids' may be zero, in which case // we define the support to be one to avoid a NaN: double support = 1.0; if (n_other_ids > 0) // the normal case { support = (res_it->second.second.size() - 1.0) / n_other_ids; } res_it->second.second.resize(2); res_it->second.second[0] = score; res_it->second.second[1] = support; } }