size_t HgBleuScorer::GetTargetLength(const Edge& edge) const { size_t targetLength = 0; for (size_t i = 0; i < edge.Words().size(); ++i) { const Vocab::Entry* word = edge.Words()[i]; if (word) ++targetLength; } for (size_t i = 0; i < edge.Children().size(); ++i) { const VertexState& state = vertexStates_[edge.Children()[i]]; targetLength += state.targetLength; } return targetLength; }
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) { //TODO: Maybe more efficient to absorb into the Score() method VertexState& vertexState = vertexStates_[vertexId]; //cerr << "Updating state for " << vertexId << endl; //leftContext int wi = 0; const VertexState* childState = NULL; int contexti = 0; //index within child context int childi = 0; while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) { if ((size_t)wi >= winnerEdge.Words().size()) break; const Vocab::Entry* word = winnerEdge.Words()[wi]; if (word != NULL) { vertexState.leftContext.push_back(word); ++wi; } else { if (childState == NULL) { //start of child state childState = &(vertexStates_[winnerEdge.Children()[childi++]]); contexti = 0; } if ((size_t)contexti < childState->leftContext.size()) { vertexState.leftContext.push_back(childState->leftContext[contexti++]); } else { //end of child context childState = NULL; ++wi; } } } //rightContext wi = winnerEdge.Words().size() - 1; childState = NULL; childi = winnerEdge.Children().size() - 1; while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) { if (wi < 0) break; const Vocab::Entry* word = winnerEdge.Words()[wi]; if (word != NULL) { vertexState.rightContext.push_back(word); --wi; } else { if (childState == NULL) { //start (ie rhs) of child state childState = &(vertexStates_[winnerEdge.Children()[childi--]]); contexti = childState->rightContext.size()-1; } if (contexti >= 0) { vertexState.rightContext.push_back(childState->rightContext[contexti--]); } else { //end (ie lhs) of child context childState = NULL; --wi; } } } reverse(vertexState.rightContext.begin(), vertexState.rightContext.end()); //length + counts vertexState.targetLength = GetTargetLength(winnerEdge); vertexState.bleuStats = bleuStats; }
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) { NgramCounter ngramCounts; size_t childId = 0; size_t wordId = 0; size_t contextId = 0; //position within left or right context const VertexState* vertexState = NULL; bool inLeftContext = false; bool inRightContext = false; list<WordVec> openNgrams; const Vocab::Entry* currentWord = NULL; while (wordId < edge.Words().size()) { currentWord = edge.Words()[wordId]; if (currentWord != NULL) { ++wordId; } else { if (!inLeftContext && !inRightContext) { //entering a vertex assert(!vertexState); vertexState = &(vertexStates_[edge.Children()[childId]]); ++childId; if (vertexState->leftContext.size()) { inLeftContext = true; contextId = 0; currentWord = vertexState->leftContext[contextId]; } else { //empty context vertexState = NULL; ++wordId; continue; } } else { //already in a vertex ++contextId; if (inLeftContext && contextId < vertexState->leftContext.size()) { //still in left context currentWord = vertexState->leftContext[contextId]; } else if (inLeftContext) { //at end of left context if (vertexState->leftContext.size() == kBleuNgramOrder-1) { //full size context, jump to right state openNgrams.clear(); inLeftContext = false; inRightContext = true; contextId = 0; currentWord = vertexState->rightContext[contextId]; } else { //short context, just ignore right context inLeftContext = false; vertexState = NULL; ++wordId; continue; } } else { //in right context if (contextId < vertexState->rightContext.size()) { currentWord = vertexState->rightContext[contextId]; } else { //leaving vertex inRightContext = false; vertexState = NULL; ++wordId; continue; } } } } assert(currentWord); if (graph_.IsBoundary(currentWord)) continue; openNgrams.push_front(WordVec()); openNgrams.front().reserve(kBleuNgramOrder); for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) { k->push_back(currentWord); //Only insert ngrams that cross boundaries if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k]; } if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back(); } //Collect matches //This edge //cerr << "edge ngrams" << endl; UpdateMatches(ngramCounts, bleuStats); //Child vertexes for (size_t i = 0; i < edge.Children().size(); ++i) { //cerr << "vertex ngrams " << edge.Children()[i] << endl; for (size_t j = 0; j < bleuStats.size(); ++j) { bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j]; } } FeatureStatsType sourceLength = head.SourceCovered(); size_t referenceLength = references_.Length(sentenceId_); FeatureStatsType effectiveReferenceLength = sourceLength / totalSourceLength_ * referenceLength; bleuStats[bleuStats.size()-1] = effectiveReferenceLength; //backgroundBleu_[backgroundBleu_.size()-1] = // backgroundRefLength_ * sourceLength / totalSourceLength_; FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_); return bleu; }