Пример #1
0
void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
  if (sid >= m_references.size()) {
    stringstream msg;
    msg << "Sentence id (" << sid << ") not found in reference set";
    throw runtime_error(msg.str());
  }
  NgramCounts testcounts;
  // stats for this line
  vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
  string sentence = preprocessSentence(text);
  const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder, true);

  const int reference_len = CalcReferenceLength(sid, length);
  stats.push_back(reference_len);

  //precision on each ngram type
  for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
       testcounts_it != testcounts.end(); ++testcounts_it) {
    const NgramCounts::Value guess = testcounts_it->second;
    const size_t len = testcounts_it->first.size();
    NgramCounts::Value correct = 0;

    NgramCounts::Value v = 0;
    if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
      correct = min(v, guess);
    }
    stats[len * 2 - 2] += correct;
    stats[len * 2 - 1] += guess;
  }
  entry.set(stats);
}
Пример #2
0
void BleuScorer::DumpCounts(ostream* os,
                            const NgramCounts& counts) const {
    for (NgramCounts::const_iterator it = counts.begin();
            it != counts.end(); ++it) {
        *os << "(";
        const NgramCounts::Key& keys = it->first;
        for (size_t i = 0; i < keys.size(); ++i) {
            if (i != 0) {
                *os << " ";
            }
            *os << keys[i];
        }
        *os << ") : " << it->second << ", ";
    }
    *os << endl;
}
Пример #3
0
bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
{
  if (is == NULL) return false;

  string line;
  size_t sid = 0;
  while (getline(*is, line)) {
    line = preprocessSentence(line);
    if (file_id == 0) {
      Reference* ref = new Reference;
      m_references.push_back(ref);    // Take ownership of the Reference object.
    }
    if (m_references.size() <= sid) {
      cerr << "Reference " << file_id << "has too many sentences." << endl;
      return false;
    }
    NgramCounts counts;
    size_t length = CountNgrams(line, counts, kBleuNgramOrder);

    //for any counts larger than those already there, merge them in
    for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
      const NgramCounts::Key& ngram = ci->first;
      const NgramCounts::Value newcount = ci->second;

      NgramCounts::Value oldcount = 0;
      m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
      if (newcount > oldcount) {
        m_references[sid]->get_counts()->operator[](ngram) = newcount;
      }
    }
    //add in the length
    m_references[sid]->push_back(length);
    if (sid > 0 && sid % 100 == 0) {
      TRACE_ERR(".");
    }
    ++sid;
  }
  return true;
}