Ejemplo n.º 1
0
int main()
{
  using namespace boost::lambda;

  static const int num_candidates=60; // divisible by 1,...,candidate::num_traits
  static const int N=20000;

  typedef strategy<num_candidates>             strategy_t;
  typedef strategy_t::candidates_t             candidates_t;
  typedef strategy_t::const_candidate_iterator const_candidate_iterator;
  
  std::vector<boost::shared_ptr<strategy_t> > strategies;
  for(int t=1;t<=candidate::num_traits;++t){
    strategies.push_back(boost::shared_ptr<strategy_t>(
      new simple_strategy<num_candidates>(t)));
  }

  std::vector<boost::array<int,num_candidates> > histogram(strategies.size());
  std::vector<double>                            acc_score(strategies.size()+1);

  for(int n=N;n--;){
    typedef boost::array<candidate,num_candidates>        candidates_t;
    typedef boost::array<const candidate*,num_candidates> candidates_ptr_t;

    const candidates_t candidates;
    candidates_ptr_t   sorted_candidates;
    for(std::size_t i=candidates.size();i--;){
      sorted_candidates[i]=&candidates[i];
    }
    std::sort(
      sorted_candidates.begin(),sorted_candidates.end(),
      bind(&candidate::score,*_1) > bind(&candidate::score,*_2));

    acc_score.back()+=sorted_candidates[0]->score;

    for(std::size_t t=0;t<strategies.size();++t){
      const_candidate_iterator it=strategies[t]->select(candidates);
      acc_score[t]+=it->score;

      candidates_ptr_t::iterator itp=
        std::find_if(
          sorted_candidates.begin(),sorted_candidates.end(),
          _1==&*it);
      std::size_t choice=(std::size_t)(itp-sorted_candidates.begin());
      ++(histogram[t][choice]);
    }
  }

  for(std::size_t t=0;t<strategies.size();++t){
    std::cout<<strategies[t]->name()<<"\t"<<acc_score[t]/N<<std::endl;
    for(std::size_t i=0;i<histogram[t].size();++i){
      std::cout<<histogram[t][i]<<"\t";
    }
    std::cout<<std::endl;
  }
  std::cout<<"optimum_strategy\t"<<acc_score[strategies.size()]/N<<std::endl;
}
Ejemplo n.º 2
0
void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
                                   statscores_t& scores) const
{
  if (!m_score_data) {
    throw runtime_error("Score data not loaded");
  }
  // calculate the score for the candidates
  if (m_score_data->size() == 0) {
    throw runtime_error("Score data is empty");
  }
  if (candidates.size() == 0) {
    throw runtime_error("No candidates supplied");
  }
  int numCounts = m_score_data->get(0,candidates[0]).size();
  vector<int> totals(numCounts);
  for (size_t i = 0; i < candidates.size(); ++i) {
    ScoreStats stats = m_score_data->get(i,candidates[i]);
    if (stats.size() != totals.size()) {
      stringstream msg;
      msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
          << "number of fields. Found: " << stats.size() << " Expected: "
          << totals.size();
      throw runtime_error(msg.str());
    }
    for (size_t k = 0; k < totals.size(); ++k) {
      totals[k] += stats.get(k);
    }
  }
  scores.push_back(calculateScore(totals));

  candidates_t last_candidates(candidates);
  // apply each of the diffs, and get new scores
  for (size_t i = 0; i < diffs.size(); ++i) {
    for (size_t j = 0; j < diffs[i].size(); ++j) {
      size_t sid = diffs[i][j].first;
      size_t nid = diffs[i][j].second;
      size_t last_nid = last_candidates[sid];
      for (size_t k  = 0; k < totals.size(); ++k) {
        int diff = m_score_data->get(sid,nid).get(k)
                   - m_score_data->get(sid,last_nid).get(k);
        totals[k] += diff;
      }
      last_candidates[sid] = nid;
    }
    scores.push_back(calculateScore(totals));
  }

  // Regularisation. This can either be none, or the min or average as described in
  // Cer, Jurafsky and Manning at WMT08.
  if (m_regularization_type == NONE || m_regularization_window <= 0) {
    // no regularisation
    return;
  }

  // window size specifies the +/- in each direction
  statscores_t raw_scores(scores);      // copy scores
  for (size_t i = 0; i < scores.size(); ++i) {
    size_t start = 0;
    if (i >= m_regularization_window) {
      start = i - m_regularization_window;
    }
    const size_t end = min(scores.size(), i + m_regularization_window + 1);
    if (m_regularization_type == AVERAGE) {
      scores[i] = score_average(raw_scores,start,end);
    } else {
      scores[i] = score_min(raw_scores,start,end);
    }
  }
}