Ejemplo n.º 1
0
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
{
    //cerr << line << endl;
    NgramCounter ngramCounts;
    list<WordVec> openNgrams;
    size_t length = 0;
    //tokenize & count
    for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
        const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
        ++length;
        openNgrams.push_front(WordVec());
        for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
            k->push_back(nextTok);
            ++ngramCounts[*k];
        }
        if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
    }

    //merge into overall ngram map
    for (NgramCounter::const_iterator ni = ngramCounts.begin();
            ni != ngramCounts.end(); ++ni) {
        size_t count = ni->second;
        //cerr << *ni << " " << count <<  endl;
        if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
        NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
        if (totalsIter == ngramCounts_[sentenceId].end()) {
            ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
        } else {
            ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
            ngramCounts_[sentenceId][ni->first].second += count; //no clip
        }
    }
    //length
    if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
    //TODO - length strategy - this is MIN
    if (!lengths_[sentenceId]) {
        lengths_[sentenceId] = length;
    } else {
        lengths_[sentenceId] = min(length,lengths_[sentenceId]);
    }
    //cerr << endl;

}
Ejemplo n.º 2
0
FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
{
    NgramCounter ngramCounts;
    size_t childId = 0;
    size_t wordId = 0;
    size_t contextId = 0; //position within left or right context
    const VertexState* vertexState = NULL;
    bool inLeftContext = false;
    bool inRightContext = false;
    list<WordVec> openNgrams;
    const Vocab::Entry* currentWord = NULL;
    while (wordId < edge.Words().size()) {
        currentWord = edge.Words()[wordId];
        if (currentWord != NULL) {
            ++wordId;
        } else {
            if (!inLeftContext && !inRightContext) {
                //entering a vertex
                assert(!vertexState);
                vertexState = &(vertexStates_[edge.Children()[childId]]);
                ++childId;
                if (vertexState->leftContext.size()) {
                    inLeftContext = true;
                    contextId = 0;
                    currentWord = vertexState->leftContext[contextId];
                } else {
                    //empty context
                    vertexState = NULL;
                    ++wordId;
                    continue;
                }
            } else {
                //already in a vertex
                ++contextId;
                if (inLeftContext && contextId < vertexState->leftContext.size()) {
                    //still in left context
                    currentWord = vertexState->leftContext[contextId];
                } else if (inLeftContext) {
                    //at end of left context
                    if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
                        //full size context, jump to right state
                        openNgrams.clear();
                        inLeftContext = false;
                        inRightContext = true;
                        contextId = 0;
                        currentWord = vertexState->rightContext[contextId];
                    } else {
                        //short context, just ignore right context
                        inLeftContext = false;
                        vertexState = NULL;
                        ++wordId;
                        continue;
                    }
                } else {
                    //in right context
                    if (contextId < vertexState->rightContext.size()) {
                        currentWord = vertexState->rightContext[contextId];
                    } else {
                        //leaving vertex
                        inRightContext = false;
                        vertexState = NULL;
                        ++wordId;
                        continue;
                    }
                }
            }
        }
        assert(currentWord);
        if (graph_.IsBoundary(currentWord)) continue;
        openNgrams.push_front(WordVec());
        openNgrams.front().reserve(kBleuNgramOrder);
        for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
            k->push_back(currentWord);
            //Only insert ngrams that cross boundaries
            if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
        }
        if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
    }

    //Collect matches
    //This edge
    //cerr << "edge ngrams" << endl;
    UpdateMatches(ngramCounts, bleuStats);

    //Child vertexes
    for (size_t i = 0; i < edge.Children().size(); ++i) {
        //cerr << "vertex ngrams " << edge.Children()[i] << endl;
        for (size_t j = 0; j < bleuStats.size(); ++j) {
            bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
        }
    }


    FeatureStatsType sourceLength = head.SourceCovered();
    size_t referenceLength = references_.Length(sentenceId_);
    FeatureStatsType effectiveReferenceLength =
        sourceLength / totalSourceLength_ * referenceLength;

    bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
    //backgroundBleu_[backgroundBleu_.size()-1] =
    //  backgroundRefLength_ * sourceLength / totalSourceLength_;
    FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);

    return bleu;
}
Ejemplo n.º 3
0
void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu,  HgHypothesis* bestHypo)
{
    BackPointer init(NULL,kMinScore);
    vector<BackPointer> backPointers(graph.VertexSize(),init);
    HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
    vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
    for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
//    cerr << "vertex id " << vi <<  endl;
        FeatureStatsType winnerScore = kMinScore;
        const Vertex& vertex = graph.GetVertex(vi);
        const vector<const Edge*>& incoming = vertex.GetIncoming();
        if (!incoming.size()) {
            //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
            //If no incoming edges, vertex is a dead end
            backPointers[vi].first = NULL;
            backPointers[vi].second = kMinScore;
        } else {
            //cerr << "\nVertex: " << vi << endl;
            for (size_t ei = 0; ei < incoming.size(); ++ei) {
                //cerr << "edge id " << ei << endl;
                FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
                for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
                    size_t childId = incoming[ei]->Children()[i];
                    //UTIL_THROW_IF(backPointers[childId].second == kMinScore,
                    //  HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
                    incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
                }
                vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
                // cerr << "Score: " << incomingScore << " Bleu: ";
                // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
                FeatureStatsType totalScore = incomingScore;
                if (bleuWeight) {
                    FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
                    if (isnan(bleuScore)) {
                        cerr << "WARN: bleu score undefined" << endl;
                        cerr << "\tVertex id : " << vi << endl;
                        cerr << "\tBleu stats : ";
                        for (size_t i = 0; i < bleuStats.size(); ++i) {
                            cerr << bleuStats[i] << ",";
                        }
                        cerr << endl;
                        bleuScore = 0;
                    }
                    //UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
                    totalScore += bleuWeight * bleuScore;
                    //  cerr << bleuScore << " Total: " << incomingScore << endl << endl;
                    //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
                }
                if (totalScore >= winnerScore) {
                    //We only store the feature score (not the bleu score) with the vertex,
                    //since the bleu score is always cumulative, ie from counts for the whole span.
                    winnerScore = totalScore;
                    backPointers[vi].first = incoming[ei];
                    backPointers[vi].second = incomingScore;
                    winnerStats = bleuStats;
                }
            }
            //update with winner
            //if (bleuWeight) {
            //TODO: Not sure if we need this when computing max-model solution
            if (backPointers[vi].first) {
                bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
            }

        }
//    cerr  << "backpointer[" << vi << "] = (" << backPointers[vi].first << "," << backPointers[vi].second << ")" << endl;
    }

    //expand back pointers
    GetBestHypothesis(graph.VertexSize()-1, graph, backPointers, bestHypo);

    //bleu stats and fv

    //Need the actual (clipped) stats
    //TODO: This repeats code in bleu scorer - factor out
    bestHypo->bleuStats.resize(kBleuNgramOrder*2+1);
    NgramCounter counts;
    list<WordVec> openNgrams;
    for (size_t i = 0; i < bestHypo->text.size(); ++i) {
        const Vocab::Entry* entry = bestHypo->text[i];
        if (graph.IsBoundary(entry)) continue;
        openNgrams.push_front(WordVec());
        for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
            k->push_back(entry);
            ++counts[*k];
        }
        if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
    }
    for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
        size_t order = ngi->first.size();
        size_t count = ngi->second;
        bestHypo->bleuStats[(order-1)*2 + 1] += count;
        bestHypo->bleuStats[(order-1) * 2] += min(count, references.NgramMatches(sentenceId,ngi->first,true));
    }
    bestHypo->bleuStats[kBleuNgramOrder*2] = references.Length(sentenceId);
}
Ejemplo n.º 4
0
int set_LL_data_IQ(int deviceID, int channelNum, int length, unsigned short* addr, unsigned short* count,
					unsigned short* trigger1, unsigned short * trigger2, unsigned short* repeat){
	//Convert data pointers to vectors and passed through
	return APSRack_.set_LL_data(deviceID, channelNum, WordVec(addr, addr+length), WordVec(count, count+length),
			WordVec(trigger1, trigger1+length), WordVec(trigger2, trigger2+length), WordVec(repeat, repeat+length));
}