C++ (Cpp) NgramCounts примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: NgramCounts

Примеров на hotexamples.com: 4

C++ (Cpp) NgramCounts - 4 примера найдено. Это лучшие примеры C++ (Cpp) кода для NgramCounts, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

end(3)

begin(3)

Add(1)

Основные методы

end (3)

begin (3)

Add (1)

Пример #1

Показать файл

Файл: BleuScorer.cpp Проект: DoWhatILove/mosesdecoder

void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
  if (sid >= m_references.size()) {
    stringstream msg;
    msg << "Sentence id (" << sid << ") not found in reference set";
    throw runtime_error(msg.str());
  }
  NgramCounts testcounts;
  // stats for this line
  vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
  string sentence = preprocessSentence(text);
  const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder, true);

  const int reference_len = CalcReferenceLength(sid, length);
  stats.push_back(reference_len);

  //precision on each ngram type
  for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
       testcounts_it != testcounts.end(); ++testcounts_it) {
    const NgramCounts::Value guess = testcounts_it->second;
    const size_t len = testcounts_it->first.size();
    NgramCounts::Value correct = 0;

    NgramCounts::Value v = 0;
    if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
      correct = min(v, guess);
    }
    stats[len * 2 - 2] += correct;
    stats[len * 2 - 1] += guess;
  }
  entry.set(stats);
}

Пример #2

Показать файл

Файл: BleuScorer.cpp Проект: DoWhatILove/mosesdecoder

size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
                               unsigned int n, bool is_testing)
{
  assert(n > 0);
  vector<int> encoded_tokens;

  // When performing tokenization of a hypothesis translation, we don't have
  // to update the Scorer's word vocabulary. However, the tokenization of
  // reference translations requires modifying the vocabulary, which means
  // this procedure might be slower than the tokenization the hypothesis
  // translation.
  if (is_testing) {
    TokenizeAndEncodeTesting(line, encoded_tokens);
  } else {
    TokenizeAndEncode(line, encoded_tokens);
  }
  const size_t len = encoded_tokens.size();
  vector<int> ngram;

  for (size_t k = 1; k <= n; ++k) {
    //ngram order longer than sentence - no point
    if (k > len) {
      continue;
    }
    for (size_t i = 0; i < len - k + 1; ++i) {
      ngram.clear();
      ngram.reserve(len);
      for (size_t j = i; j < i+k && j < len; ++j) {
        ngram.push_back(encoded_tokens[j]);
      }
      counts.Add(ngram);
    }
  }
  return len;
}

Пример #3

Показать файл

Файл: BleuScorer.cpp Проект: KeiVirtual/mosesdecoder

void BleuScorer::DumpCounts(ostream* os,
                            const NgramCounts& counts) const {
    for (NgramCounts::const_iterator it = counts.begin();
            it != counts.end(); ++it) {
        *os << "(";
        const NgramCounts::Key& keys = it->first;
        for (size_t i = 0; i < keys.size(); ++i) {
            if (i != 0) {
                *os << " ";
            }
            *os << keys[i];
        }
        *os << ") : " << it->second << ", ";
    }
    *os << endl;
}

Пример #4

Показать файл

Файл: BleuScorer.cpp Проект: DoWhatILove/mosesdecoder

bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
{
  if (is == NULL) return false;

  string line;
  size_t sid = 0;
  while (getline(*is, line)) {
    line = preprocessSentence(line);
    if (file_id == 0) {
      Reference* ref = new Reference;
      m_references.push_back(ref);    // Take ownership of the Reference object.
    }
    if (m_references.size() <= sid) {
      cerr << "Reference " << file_id << "has too many sentences." << endl;
      return false;
    }
    NgramCounts counts;
    size_t length = CountNgrams(line, counts, kBleuNgramOrder);

    //for any counts larger than those already there, merge them in
    for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
      const NgramCounts::Key& ngram = ci->first;
      const NgramCounts::Value newcount = ci->second;

      NgramCounts::Value oldcount = 0;
      m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
      if (newcount > oldcount) {
        m_references[sid]->get_counts()->operator[](ngram) = newcount;
      }
    }
    //add in the length
    m_references[sid]->push_back(length);
    if (sid > 0 && sid % 100 == 0) {
      TRACE_ERR(".");
    }
    ++sid;
  }
  return true;
}