int main() { using namespace boost::lambda; static const int num_candidates=60; // divisible by 1,...,candidate::num_traits static const int N=20000; typedef strategy<num_candidates> strategy_t; typedef strategy_t::candidates_t candidates_t; typedef strategy_t::const_candidate_iterator const_candidate_iterator; std::vector<boost::shared_ptr<strategy_t> > strategies; for(int t=1;t<=candidate::num_traits;++t){ strategies.push_back(boost::shared_ptr<strategy_t>( new simple_strategy<num_candidates>(t))); } std::vector<boost::array<int,num_candidates> > histogram(strategies.size()); std::vector<double> acc_score(strategies.size()+1); for(int n=N;n--;){ typedef boost::array<candidate,num_candidates> candidates_t; typedef boost::array<const candidate*,num_candidates> candidates_ptr_t; const candidates_t candidates; candidates_ptr_t sorted_candidates; for(std::size_t i=candidates.size();i--;){ sorted_candidates[i]=&candidates[i]; } std::sort( sorted_candidates.begin(),sorted_candidates.end(), bind(&candidate::score,*_1) > bind(&candidate::score,*_2)); acc_score.back()+=sorted_candidates[0]->score; for(std::size_t t=0;t<strategies.size();++t){ const_candidate_iterator it=strategies[t]->select(candidates); acc_score[t]+=it->score; candidates_ptr_t::iterator itp= std::find_if( sorted_candidates.begin(),sorted_candidates.end(), _1==&*it); std::size_t choice=(std::size_t)(itp-sorted_candidates.begin()); ++(histogram[t][choice]); } } for(std::size_t t=0;t<strategies.size();++t){ std::cout<<strategies[t]->name()<<"\t"<<acc_score[t]/N<<std::endl; for(std::size_t i=0;i<histogram[t].size();++i){ std::cout<<histogram[t][i]<<"\t"; } std::cout<<std::endl; } std::cout<<"optimum_strategy\t"<<acc_score[strategies.size()]/N<<std::endl; }
virtual const_candidate_iterator select(const candidates_t& candidates) { using namespace boost::lambda; return std::max_element( candidates.begin(),candidates.begin()+n, bind(&candidate::partial_score,_1,t) < bind(&candidate::partial_score,_2,t)); }
void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores) const { if (!m_score_data) { throw runtime_error("Score data not loaded"); } // calculate the score for the candidates if (m_score_data->size() == 0) { throw runtime_error("Score data is empty"); } if (candidates.size() == 0) { throw runtime_error("No candidates supplied"); } int numCounts = m_score_data->get(0,candidates[0]).size(); vector<int> totals(numCounts); for (size_t i = 0; i < candidates.size(); ++i) { ScoreStats stats = m_score_data->get(i,candidates[i]); if (stats.size() != totals.size()) { stringstream msg; msg << "Statistics for (" << "," << candidates[i] << ") have incorrect " << "number of fields. Found: " << stats.size() << " Expected: " << totals.size(); throw runtime_error(msg.str()); } for (size_t k = 0; k < totals.size(); ++k) { totals[k] += stats.get(k); } } scores.push_back(calculateScore(totals)); candidates_t last_candidates(candidates); // apply each of the diffs, and get new scores for (size_t i = 0; i < diffs.size(); ++i) { for (size_t j = 0; j < diffs[i].size(); ++j) { size_t sid = diffs[i][j].first; size_t nid = diffs[i][j].second; size_t last_nid = last_candidates[sid]; for (size_t k = 0; k < totals.size(); ++k) { int diff = m_score_data->get(sid,nid).get(k) - m_score_data->get(sid,last_nid).get(k); totals[k] += diff; } last_candidates[sid] = nid; } scores.push_back(calculateScore(totals)); } // Regularisation. This can either be none, or the min or average as described in // Cer, Jurafsky and Manning at WMT08. if (m_regularization_type == NONE || m_regularization_window <= 0) { // no regularisation return; } // window size specifies the +/- in each direction statscores_t raw_scores(scores); // copy scores for (size_t i = 0; i < scores.size(); ++i) { size_t start = 0; if (i >= m_regularization_window) { start = i - m_regularization_window; } const size_t end = min(scores.size(), i + m_regularization_window + 1); if (m_regularization_type == AVERAGE) { scores[i] = score_average(raw_scores,start,end); } else { scores[i] = score_min(raw_scores,start,end); } } }