Ejemplo n.º 1
0
bool operator==(const ScoreStats& s1, const ScoreStats& s2)
{
  size_t size = s1.size();

  if (size != s2.size())
    return false;

  for (size_t k=0; k < size; k++) {
    if (s1.get(k) != s2.get(k))
      return false;
  }

  return true;
}
Ejemplo n.º 2
0
// really not the right place...
float sentenceLevelBleuPlusOne( ScoreStats &stats ) {
	float logbleu = 0.0;
	const unsigned int bleu_order = 4;
	for (unsigned int j=0; j<bleu_order; j++) {
		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
		logbleu += log(stats.get(2*j)+1) - log(stats.get(2*j+1)+1);
	}
	logbleu /= bleu_order;
	float brevity = 1.0 - (float)stats.get(bleu_order*2)/stats.get(1);
	if (brevity < 0.0) {
		logbleu += brevity;
	}
	//cerr << brevity << " -> " << exp(logbleu) << endl;
	return exp(logbleu);
}
Ejemplo n.º 3
0
void InterpolatedScorer::setScoreData(ScoreData* data)
{
  size_t last = 0;
  m_score_data = data;
  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
       itsc != m_scorers.end(); ++itsc) {
    int numScoresScorer = (*itsc)->NumberOfScores();
    ScoreData* newData =new ScoreData(*itsc);
    for (size_t i = 0; i < data->size(); i++) {
      ScoreArray scoreArray = data->get(i);
      ScoreArray newScoreArray;
      std::string istr;
      std::stringstream out;
      out << i;
      istr = out.str();
      size_t numNBest = scoreArray.size();
      //cout << " Datasize " << data->size() <<  " NumNBest " << numNBest << endl ;
      for (size_t j = 0; j < numNBest ; j++) {
        ScoreStats scoreStats = data->get(i, j);
        //cout << "Scorestats " << scoreStats << " i " << i << " j " << j << endl;
        ScoreStats newScoreStats;
        for (size_t k = last; k < size_t(numScoresScorer + last); k++) {
          ScoreStatsType score = scoreStats.get(k);
          newScoreStats.add(score);
        }
        //cout << " last " << last << " NumScores " << numScoresScorer << "newScorestats " << newScoreStats << endl;
        newScoreArray.add(newScoreStats);
      }
      newScoreArray.setIndex(istr);
      newData->add(newScoreArray);
    }
    //newData->dump();

    // NOTE: This class takes the ownership of the heap allocated
    // ScoreData objects to avoid the memory leak issues.
    m_scorers_score_data.push_back(newData);

    (*itsc)->setScoreData(newData);
    last += numScoresScorer;
  }
}
Ejemplo n.º 4
0
void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
                                   statscores_t& scores) const
{
  if (!m_score_data) {
    throw runtime_error("Score data not loaded");
  }
  // calculate the score for the candidates
  if (m_score_data->size() == 0) {
    throw runtime_error("Score data is empty");
  }
  if (candidates.size() == 0) {
    throw runtime_error("No candidates supplied");
  }
  int numCounts = m_score_data->get(0,candidates[0]).size();
  vector<int> totals(numCounts);
  for (size_t i = 0; i < candidates.size(); ++i) {
    ScoreStats stats = m_score_data->get(i,candidates[i]);
    if (stats.size() != totals.size()) {
      stringstream msg;
      msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
          << "number of fields. Found: " << stats.size() << " Expected: "
          << totals.size();
      throw runtime_error(msg.str());
    }
    for (size_t k = 0; k < totals.size(); ++k) {
      totals[k] += stats.get(k);
    }
  }
  scores.push_back(calculateScore(totals));

  candidates_t last_candidates(candidates);
  // apply each of the diffs, and get new scores
  for (size_t i = 0; i < diffs.size(); ++i) {
    for (size_t j = 0; j < diffs[i].size(); ++j) {
      size_t sid = diffs[i][j].first;
      size_t nid = diffs[i][j].second;
      size_t last_nid = last_candidates[sid];
      for (size_t k  = 0; k < totals.size(); ++k) {
        int diff = m_score_data->get(sid,nid).get(k)
                   - m_score_data->get(sid,last_nid).get(k);
        totals[k] += diff;
      }
      last_candidates[sid] = nid;
    }
    scores.push_back(calculateScore(totals));
  }

  // Regularisation. This can either be none, or the min or average as described in
  // Cer, Jurafsky and Manning at WMT08.
  if (m_regularization_type == NONE || m_regularization_window <= 0) {
    // no regularisation
    return;
  }

  // window size specifies the +/- in each direction
  statscores_t raw_scores(scores);      // copy scores
  for (size_t i = 0; i < scores.size(); ++i) {
    size_t start = 0;
    if (i >= m_regularization_window) {
      start = i - m_regularization_window;
    }
    const size_t end = min(scores.size(), i + m_regularization_window + 1);
    if (m_regularization_type == AVERAGE) {
      scores[i] = score_average(raw_scores,start,end);
    } else {
      scores[i] = score_min(raw_scores,start,end);
    }
  }
}