コード例 #1
0
void Bleu::addSentences(const string& candSentence, const vector<string>& refSentences) {
    if (refSentences.size() != refNum) {
        std::cout << "The reference sentences are not in the right number" << std::endl;
        exit(1);
    }

    vector<string> candSentenWords = sentenceTokenizer(candSentence);
    if (candSentenWords.size() < maxGramNum) {
        std::cout << "The candidate's words are less than the masx gram number" << std::endl;
        exit(1);
    }

    vector< vector<string> > refSentensWords;
    size_t bestRefLength = INT_MAX;
    for (auto& refSentence : refSentences) {
        if (refSentence == "") {
            std::cout << "The reference sentences is empty." << std::endl;
            exit(1);
        }
        vector<string> refSentenWords = sentenceTokenizer(refSentence);
        refSentensWords.push_back(refSentenWords);
        if (abs((int)candSentenWords.size() - (int)refSentenWords.size()) < (int)bestRefLength) {
            bestRefLength = refSentenWords.size();
        }
    }
    bestRefLengthSum += bestRefLength;

    for (size_t gramNum = 1; gramNum <= maxGramNum; gramNum++) {
        saveClippedHits(candSentenWords, refSentensWords, gramNum);
        candLengthSum[gramNum - 1] += candSentenWords.size() - gramNum + 1;
    }
}
コード例 #2
0
std::pair<IndexType, Intent> SentenceToIntentTranslator::translate(
    const std::string& sentence, const DictionaryModel& dictionaryModel) {
  IntentModel::Intent intent;

  std::vector<std::string> tokens;
  SentenceTokenizer sentenceTokenizer(dictionaryModel);
  sentenceTokenizer.tokenize(sentence, tokens);
  std::vector<int> entities =
      extractEntities(EntitiesMatcher::match(tokens, dictionaryModel));
  intent.entities = entities;

  IntentModel::EntityToNames entityToVariableNames;
  completeVariableNames(entities, dictionaryModel, entityToVariableNames);
  intent.entityToVariableNames = entityToVariableNames;

  INTENT_LOG_INFO() << "Translate sentence \"" + sentence +
                           "\" into intent with following entities [" +
                           logEntities(entities, dictionaryModel) + "].";

  std::string intentId = IntentEncoder::encode(entities);
  intent.intentId = intentId;
  intent.example = sentence;
  return std::pair<std::string, IntentModel::Intent>(intentId, intent);
}