Пример #1
0
size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
{
    size_t targetLength = 0;
    for (size_t i = 0; i < edge.Words().size(); ++i) {
        const Vocab::Entry* word = edge.Words()[i];
        if (word) ++targetLength;
    }
    for (size_t i = 0; i < edge.Children().size(); ++i) {
        const VertexState& state = vertexStates_[edge.Children()[i]];
        targetLength += state.targetLength;
    }
    return targetLength;
}
Пример #2
0
void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
{
    //TODO: Maybe more efficient to absorb into the Score() method
    VertexState& vertexState = vertexStates_[vertexId];
    //cerr << "Updating state for " << vertexId << endl;

    //leftContext
    int wi = 0;
    const VertexState* childState = NULL;
    int contexti = 0; //index within child context
    int childi = 0;
    while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) {
        if ((size_t)wi >= winnerEdge.Words().size()) break;
        const Vocab::Entry* word = winnerEdge.Words()[wi];
        if (word != NULL) {
            vertexState.leftContext.push_back(word);
            ++wi;
        } else {
            if (childState == NULL) {
                //start of child state
                childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
                contexti = 0;
            }
            if ((size_t)contexti < childState->leftContext.size()) {
                vertexState.leftContext.push_back(childState->leftContext[contexti++]);
            } else {
                //end of child context
                childState = NULL;
                ++wi;
            }
        }
    }

    //rightContext
    wi = winnerEdge.Words().size() - 1;
    childState = NULL;
    childi = winnerEdge.Children().size() - 1;
    while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) {
        if (wi < 0) break;
        const Vocab::Entry* word = winnerEdge.Words()[wi];
        if (word != NULL) {
            vertexState.rightContext.push_back(word);
            --wi;
        } else {
            if (childState == NULL) {
                //start (ie rhs) of child state
                childState = &(vertexStates_[winnerEdge.Children()[childi--]]);
                contexti = childState->rightContext.size()-1;
            }
            if (contexti >= 0) {
                vertexState.rightContext.push_back(childState->rightContext[contexti--]);
            } else {
                //end (ie lhs) of child context
                childState = NULL;
                --wi;
            }
        }
    }
    reverse(vertexState.rightContext.begin(), vertexState.rightContext.end());

    //length + counts
    vertexState.targetLength = GetTargetLength(winnerEdge);
    vertexState.bleuStats = bleuStats;
}
Пример #3
0
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
{
    NgramCounter ngramCounts;
    size_t childId = 0;
    size_t wordId = 0;
    size_t contextId = 0; //position within left or right context
    const VertexState* vertexState = NULL;
    bool inLeftContext = false;
    bool inRightContext = false;
    list<WordVec> openNgrams;
    const Vocab::Entry* currentWord = NULL;
    while (wordId < edge.Words().size()) {
        currentWord = edge.Words()[wordId];
        if (currentWord != NULL) {
            ++wordId;
        } else {
            if (!inLeftContext && !inRightContext) {
                //entering a vertex
                assert(!vertexState);
                vertexState = &(vertexStates_[edge.Children()[childId]]);
                ++childId;
                if (vertexState->leftContext.size()) {
                    inLeftContext = true;
                    contextId = 0;
                    currentWord = vertexState->leftContext[contextId];
                } else {
                    //empty context
                    vertexState = NULL;
                    ++wordId;
                    continue;
                }
            } else {
                //already in a vertex
                ++contextId;
                if (inLeftContext && contextId < vertexState->leftContext.size()) {
                    //still in left context
                    currentWord = vertexState->leftContext[contextId];
                } else if (inLeftContext) {
                    //at end of left context
                    if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
                        //full size context, jump to right state
                        openNgrams.clear();
                        inLeftContext = false;
                        inRightContext = true;
                        contextId = 0;
                        currentWord = vertexState->rightContext[contextId];
                    } else {
                        //short context, just ignore right context
                        inLeftContext = false;
                        vertexState = NULL;
                        ++wordId;
                        continue;
                    }
                } else {
                    //in right context
                    if (contextId < vertexState->rightContext.size()) {
                        currentWord = vertexState->rightContext[contextId];
                    } else {
                        //leaving vertex
                        inRightContext = false;
                        vertexState = NULL;
                        ++wordId;
                        continue;
                    }
                }
            }
        }
        assert(currentWord);
        if (graph_.IsBoundary(currentWord)) continue;
        openNgrams.push_front(WordVec());
        openNgrams.front().reserve(kBleuNgramOrder);
        for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
            k->push_back(currentWord);
            //Only insert ngrams that cross boundaries
            if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
        }
        if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
    }

    //Collect matches
    //This edge
    //cerr << "edge ngrams" << endl;
    UpdateMatches(ngramCounts, bleuStats);

    //Child vertexes
    for (size_t i = 0; i < edge.Children().size(); ++i) {
        //cerr << "vertex ngrams " << edge.Children()[i] << endl;
        for (size_t j = 0; j < bleuStats.size(); ++j) {
            bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
        }
    }


    FeatureStatsType sourceLength = head.SourceCovered();
    size_t referenceLength = references_.Length(sentenceId_);
    FeatureStatsType effectiveReferenceLength =
        sourceLength / totalSourceLength_ * referenceLength;

    bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
    //backgroundBleu_[backgroundBleu_.size()-1] =
    //  backgroundRefLength_ * sourceLength / totalSourceLength_;
    FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);

    return bleu;
}