void Bleu::addSentences(const string& candSentence, const vector<string>& refSentences) { if (refSentences.size() != refNum) { std::cout << "The reference sentences are not in the right number" << std::endl; exit(1); } vector<string> candSentenWords = sentenceTokenizer(candSentence); if (candSentenWords.size() < maxGramNum) { std::cout << "The candidate's words are less than the masx gram number" << std::endl; exit(1); } vector< vector<string> > refSentensWords; size_t bestRefLength = INT_MAX; for (auto& refSentence : refSentences) { if (refSentence == "") { std::cout << "The reference sentences is empty." << std::endl; exit(1); } vector<string> refSentenWords = sentenceTokenizer(refSentence); refSentensWords.push_back(refSentenWords); if (abs((int)candSentenWords.size() - (int)refSentenWords.size()) < (int)bestRefLength) { bestRefLength = refSentenWords.size(); } } bestRefLengthSum += bestRefLength; for (size_t gramNum = 1; gramNum <= maxGramNum; gramNum++) { saveClippedHits(candSentenWords, refSentensWords, gramNum); candLengthSum[gramNum - 1] += candSentenWords.size() - gramNum + 1; } }
std::pair<IndexType, Intent> SentenceToIntentTranslator::translate( const std::string& sentence, const DictionaryModel& dictionaryModel) { IntentModel::Intent intent; std::vector<std::string> tokens; SentenceTokenizer sentenceTokenizer(dictionaryModel); sentenceTokenizer.tokenize(sentence, tokens); std::vector<int> entities = extractEntities(EntitiesMatcher::match(tokens, dictionaryModel)); intent.entities = entities; IntentModel::EntityToNames entityToVariableNames; completeVariableNames(entities, dictionaryModel, entityToVariableNames); intent.entityToVariableNames = entityToVariableNames; INTENT_LOG_INFO() << "Translate sentence \"" + sentence + "\" into intent with following entities [" + logEntities(entities, dictionaryModel) + "]."; std::string intentId = IntentEncoder::encode(entities); intent.intentId = intentId; intent.example = sentence; return std::pair<std::string, IntentModel::Intent>(intentId, intent); }