size_t Phrase::Find(const Phrase &sought, int maxUnknown) const { if (GetSize() < sought.GetSize()) { // sought phrase too big return NOT_FOUND; } size_t maxStartPos = GetSize() - sought.GetSize(); for (size_t startThisPos = 0; startThisPos <= maxStartPos; ++startThisPos) { size_t thisPos = startThisPos; int currUnknowns = 0; size_t soughtPos; for (soughtPos = 0; soughtPos < sought.GetSize(); ++soughtPos) { const Word &soughtWord = sought.GetWord(soughtPos); const Word &thisWord = GetWord(thisPos); if (soughtWord == thisWord) { ++thisPos; } else if (soughtWord.IsOOV() && (maxUnknown < 0 || currUnknowns < maxUnknown)) { // the output has an OOV word. Allow a certain number of OOVs ++currUnknowns; ++thisPos; } else { break; } } if (soughtPos == sought.GetSize()) { return startThisPos; } } return NOT_FOUND; }
IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f, const Phrase& e) const { IPhrase key; std::vector<std::string> keyPart; if(!m_FactorsF.empty()) { for(size_t i = 0; i < f.GetSize(); ++i) { /* old code std::string s = f.GetWord(i).ToString(m_FactorsF); keyPart.push_back(s.substr(0,s.size()-1)); */ keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false)); } auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId)); keyPart.clear(); } if(!m_FactorsE.empty()) { if(!key.empty()) { key.push_back(PrefixTreeMap::MagicWord); } for(size_t i = 0; i < e.GetSize(); ++i) { /* old code std::string s = e.GetWord(i).ToString(m_FactorsE); keyPart.push_back(s.substr(0,s.size()-1)); */ keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false)); } auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId)); //keyPart.clear(); } return key; };
Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context) { if(m_FactorsC.empty()) { CHECK(cands.size() <= 1); return (1 == cands.size())?(cands[0].GetScore(0)):(Scores()); } else { std::vector<std::string> cvec; for(size_t i = 0; i < context.GetSize(); ++i) { /* old code std::string s = context.GetWord(i).ToString(m_FactorsC); cvec.push_back(s.substr(0,s.size()-1)); */ cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false)); } IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId); IPhrase sub_c; IPhrase::iterator start = c.begin(); for(size_t j = 0; j <= context.GetSize(); ++j, ++start) { sub_c.assign(start, c.end()); for(size_t cand = 0; cand < cands.size(); ++cand) { IPhrase p = cands[cand].GetPhrase(0); if(cands[cand].GetPhrase(0) == sub_c) { return cands[cand].GetScore(0); } } } return Scores(); } }
int Phrase::Compare(const Phrase &compare) const { int ret = 0; for (size_t pos = 0; pos < GetSize(); ++pos) { if (pos >= compare.GetSize()) { // we're bigger than the other. Put 1st ret = -1; break; } const Word &thisWord = GetWord(pos) ,&compareWord = compare.GetWord(pos); int wordRet = thisWord.Compare(compareWord); if (wordRet != 0) { ret = wordRet; break; } } if (ret == 0) { CHECK(compare.GetSize() >= GetSize()); ret = (compare.GetSize() > GetSize()) ? 1 : 0; } return ret; }
std::vector<float> LexicalReorderingTableMemory::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) { //rather complicated because of const can't use []... as [] might enter new things into std::map //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large TableType::const_iterator r; std::string key; if(0 == c.GetSize()) { key = MakeKey(f,e,c); r = m_Table.find(key); if(m_Table.end() != r) { return r->second; } } else { //right try from large to smaller context for(size_t i = 0; i <= c.GetSize(); ++i) { Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1))); key = MakeKey(f,e,sub_c); r = m_Table.find(key); if(m_Table.end() != r) { return r->second; } } } return Scores(); }
void Manager::OutputBest(OutputCollector *collector) const { if (!collector) { return; } std::ostringstream out; FixPrecision(out); const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); if (StaticData::Instance().GetOutputHypoScore()) { out << "0 "; } out << '\n'; } else { if (StaticData::Instance().GetOutputHypoScore()) { out << best->label.score << " "; } Phrase yield = GetOneBestTargetYield(*best); // delete 1st & last UTIL_THROW_IF2(yield.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); yield.RemoveWord(0); yield.RemoveWord(yield.GetSize()-1); out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); out << '\n'; } collector->Write(m_source.GetTranslationId(), out.str()); }
Scores LexicalReorderingTableTree:: auxFindScoreForContext(const Candidates& cands, const Phrase& context) { if(m_FactorsC.empty()) { UTIL_THROW_IF2(cands.size() > 1, "Error"); return (cands.size() == 1) ? cands[0].GetScore(0) : Scores(); } else { std::vector<std::string> cvec; for(size_t i = 0; i < context.GetSize(); ++i) cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false)); IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId); IPhrase sub_c; IPhrase::iterator start = c.begin(); for(size_t j = 0; j <= context.GetSize(); ++j, ++start) { sub_c.assign(start, c.end()); for(size_t cand = 0; cand < cands.size(); ++cand) { IPhrase p = cands[cand].GetPhrase(0); if(cands[cand].GetPhrase(0) == sub_c) return cands[cand].GetScore(0); } } return Scores(); } }
Scores LexicalReorderingTableTree:: GetScore(const Phrase& f, const Phrase& e, const Phrase& c) { if((!m_FactorsF.empty() && 0 == f.GetSize()) || (!m_FactorsE.empty() && 0 == e.GetSize())) { //NOTE: no check for c as c might be empty, e.g. start of sentence //not a proper key // phi: commented out, since e may be empty (drop-unknown) //std::cerr << "Not a proper key!\n"; return Scores(); } CacheType::iterator i; if(m_UseCache) { std::pair<CacheType::iterator, bool> r; r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates())); if(!r.second) return auxFindScoreForContext((r.first)->second, c); i = r.first; } else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end()) // although we might not be caching now, cache might be none empty! return auxFindScoreForContext(i->second, c); // not in cache => go to file... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,e), &cands); if(cands.empty()) return Scores(); if(m_UseCache) i->second = cands; if(m_FactorsC.empty()) { UTIL_THROW_IF2(1 != cands.size(), "Error"); return cands[0].GetScore(0); } else return auxFindScoreForContext(cands, c); };
/** * Calculate real sentence Bleu score of complete translation */ float BleuScoreFeature::CalculateBleu(Phrase translation) const { if (translation.GetSize() == 0) return 0.0; Phrase normTranslation = translation; // remove start and end symbol for chart decoding if (m_cur_source_length != m_cur_norm_source_length) { WordsRange* range = new WordsRange(1, translation.GetSize()-2); normTranslation = translation.GetSubString(*range); } // get ngram matches for translation BleuScoreState* state = new BleuScoreState(); GetClippedNgramMatchesAndCounts(normTranslation, m_cur_ref_ngrams, state->m_ngram_counts, state->m_ngram_matches, 0); // number of words in previous states // set state variables state->m_words = normTranslation; state->m_source_length = m_cur_norm_source_length; state->m_target_length = normTranslation.GetSize(); state->m_scaled_ref_length = m_cur_ref_length; // Calculate bleu. return CalculateBleu(state); }
std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) { std::string key; Scores scores; if(0 == c.GetSize()) key = MakeKey(f, e, c); else for(size_t i = 0; i <= c.GetSize(); ++i) { Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1))); key = MakeKey(f,e,sub_c); } size_t index = m_hash[key]; if(m_hash.GetSize() != index) { std::string scoresString; if(m_inMemory) scoresString = m_scoresMemory[index]; else scoresString = m_scoresMapped[index]; BitWrapper<> bitStream(scoresString); for(size_t i = 0; i < m_numScoreComponent; i++) scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream)); return scores; } return Scores(); }
Phrase::Phrase(const Phrase ©) :m_words(copy.GetSize()) { for (size_t pos = 0; pos < copy.GetSize(); ++pos) { const Word &oldWord = copy.GetWord(pos); Word *newWord = new Word(oldWord); m_words[pos] = newWord; } }
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); size_t position; if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; } else { position = 0; } size_t ngramBoundary = m_ngram->Order() - 1; size_t end_loop = std::min(ngramBoundary, phrase.GetSize()); for (; position < end_loop; ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } float before_boundary = fullScore + scorer.Finish(); for (; position < phrase.GetSize(); ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } fullScore += scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
/** * Pre-calculate the n-gram probabilities for the words in the specified phrase. * * Note that when this method is called, we do not have access to the context * in which this phrase will eventually be applied. * * In other words, we know what words are in this phrase, * but we do not know what words will come before or after this phrase. * * The parameters fullScore, ngramScore, and oovCount are all output parameters. * * The value stored in oovCount is the number of words in the phrase * that are not in the language model's vocabulary. * * The sum of the ngram scores for all words in this phrase are stored in fullScore. * * The value stored in ngramScore is similar, but only full-order ngram scores are included. * * This is best shown by example: * * Assume a trigram backward language model and a phrase "a b c d e f g" * * fullScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) * * ngramScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) */ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); UTIL_THROW_IF( (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)), util::Exception, "BackwardLanguageModel does not currently support rules that include <s>" ); float before_boundary = 0.0f; int lastWord = phrase.GetSize() - 1; int ngramBoundary = m_ngram->Order() - 1; int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary; int position; for (position = lastWord; position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; if (position==boundary) { before_boundary = scorer.Finish(); } } fullScore = scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
// score ngrams around the overlap of two previously scored phrases void BleuScoreFeature::GetNgramMatchCounts_overlap(Phrase& phrase, const NGrams& ref_ngram_counts, std::vector< size_t >& ret_counts, std::vector< size_t >& ret_matches, size_t overlap_index) const { NGrams::const_iterator ref_ngram_counts_iter; size_t ngram_start_idx, ngram_end_idx; // Chiang et al (2008) use unclipped counts of ngram matches for (size_t end_idx = overlap_index; end_idx < phrase.GetSize(); end_idx++) { if (end_idx >= (overlap_index+BleuScoreState::bleu_order-1)) break; for (size_t order = 0; order < BleuScoreState::bleu_order; order++) { if (order > end_idx) break; ngram_end_idx = end_idx; ngram_start_idx = end_idx - order; if (ngram_start_idx >= overlap_index) continue; // only score ngrams that span the overlap point Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0); ret_counts[order]++; ref_ngram_counts_iter = ref_ngram_counts.find(ngram); if (ref_ngram_counts_iter != ref_ngram_counts.end()) ret_matches[order]++; } } }
/*** * print surface factor only for the given phrase */ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors) const { UTIL_THROW_IF2(outputFactorOrder.size() == 0, "Cannot be empty phrase"); if (reportAllFactors == true) { out << phrase; } else { size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); out << *factor; UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos); for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); UTIL_THROW_IF2(factor == NULL, "Empty factor " << i << " at position " << pos); out << "|" << *factor; } out << " "; } } }
void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const { // get length of source and target phrase size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = source.GetSize(); // create feature names stringstream nameSource; nameSource << "s" << sourceLength; stringstream nameTarget; nameTarget << "t" << targetLength; stringstream nameBoth; nameBoth << sourceLength << "," << targetLength; // increase feature counts scoreBreakdown.PlusEquals(this,nameSource.str(),1); scoreBreakdown.PlusEquals(this,nameTarget.str(),1); scoreBreakdown.PlusEquals(this,nameBoth.str(),1); //cerr << nameSource.str() << " " << nameTarget.str() << " " << nameBoth.str() << endl; }
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo, bool reportAllFactors = false) { if (hypo->GetPrevHypo() != NULL) { outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo, reportAllFactors); Phrase p = hypo->GetCurrTargetPhrase(); if(reportAllFactors) { out << p << " "; } else { for (size_t pos = 0 ; pos < p.GetSize() ; pos++) { const Factor *factor = p.GetFactor(pos, 0); out << *factor << " "; } } if (addAlignmentInfo) { /** * Add the alignment info to the array. This is in target order and consists of * (tgt-start, src-start, src-end) triples. **/ map<string, xmlrpc_c::value> phraseAlignInfo; phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos()); phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo)); } } }
/* * Given a phrase (current translation) calculate its ngram counts and * its ngram matches against the ngrams in the reference translation */ void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase, const NGrams& ref_ngram_counts, std::vector< size_t >& ret_counts, std::vector< size_t >& ret_matches, size_t skip_first) const { NGrams::const_iterator ref_ngram_counts_iter; size_t ngram_start_idx, ngram_end_idx; // Chiang et al (2008) use unclipped counts of ngram matches for (size_t end_idx = skip_first; end_idx < phrase.GetSize(); end_idx++) { for (size_t order = 0; order < BleuScoreState::bleu_order; order++) { if (order > end_idx) break; ngram_end_idx = end_idx; ngram_start_idx = end_idx - order; Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0); ret_counts[order]++; ref_ngram_counts_iter = ref_ngram_counts.find(ngram); if (ref_ngram_counts_iter != ref_ngram_counts.end()) ret_matches[order]++; } } }
/** TODO: this method isn't used anywhere. Remove? */ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const { const TargetPhrase &tp = GetCurrTargetPhrase(); size_t targetSize = tp.GetSize(); for (size_t i = 0; i < targetSize; ++i) { size_t pos; if (leftRightMost == 1) { pos = i; } else if (leftRightMost == 2) { pos = targetSize - i - 1; } else { abort(); } const Word &word = tp.GetWord(pos); if (word.IsNonTerminal()) { // non-term. fill out with prev hypo size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos]; const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd]; prevHypo->GetOutputPhrase(outPhrase); } else { outPhrase.AddWord(word); } if (outPhrase.GetSize() >= numWords) { return; } } }
void Phrase::Append(const Phrase &endPhrase) { for (size_t i = 0; i < endPhrase.GetSize(); i++) { AddWord(endPhrase.GetWord(i)); } }
void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source, const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator, const AlignmentInfo &alignmentInfo) const { // handle special case: unknown words (they have no word alignment) size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = source.GetSize(); if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return; // flag aligned words bool aligned[16]; CHECK(sourceLength < 16); for(size_t i=0; i<sourceLength; i++) aligned[i] = false; for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) aligned[ alignmentPoint->first ] = true; // process unaligned source words for(size_t i=0; i<sourceLength; i++) { if (!aligned[i]) { const Word &w = source.GetWord(i); if (!w.IsNonTerminal()) { const StringPiece word = w.GetFactor(m_factorType)->GetString(); if (word != "<s>" && word != "</s>") { if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) { accumulator->PlusEquals(this, StringPiece("OTHER"),1); } else { accumulator->PlusEquals(this,word,1); } } } } } }
void RuleScope::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const { // adjacent non-term count as 1 ammbiguity, rather than 2 as in rule scope // source can't be empty, right? float score = 0; int count = 0; for (size_t i = 0; i < source.GetSize() - 0; ++i) { const Word &word = source.GetWord(i); bool ambiguous = IsAmbiguous(word, m_sourceSyntax); if (ambiguous) { ++count; } else { if (count > 0) { score += count; } count = -1; } } // 1st & last always adjacent to ambiguity ++count; if (count > 0) { score += count; } scoreBreakdown.PlusEquals(this, score); }
void Phrase::MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec) { CHECK(GetSize() == copy.GetSize()); for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) for (std::vector<FactorType>::const_iterator i = factorVec.begin(); i != factorVec.end(); ++i) { SetFactor(currPos, *i, copy.GetFactor(currPos, *i)); } }
void Phrase::MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec) { UTIL_THROW_IF2(GetSize() != copy.GetSize(), "Both phrases need to be the same size to merge"); for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) for (std::vector<FactorType>::const_iterator i = factorVec.begin(); i != factorVec.end(); ++i) { SetFactor(currPos, *i, copy.GetFactor(currPos, *i)); } }
Phrase TrellisPath::GetSurfacePhrase() const { const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder(); Phrase targetPhrase = GetTargetPhrase() ,ret(targetPhrase.GetSize()); for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) { Word &newWord = ret.AddWord(); for (size_t i = 0 ; i < outputFactor.size() ; i++) { FactorType factorType = outputFactor[i]; const Factor *factor = targetPhrase.GetFactor(pos, factorType); CHECK(factor); newWord[factorType] = factor; } } return ret; }
void OpSequenceModel:: Evaluate(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const { osmHypothesis obj; obj.setState(OSM->NullContextState()); WordsBitmap myBitmap(source.GetSize()); vector <string> mySourcePhrase; vector <string> myTargetPhrase; vector<float> scores(5); vector <int> alignments; int startIndex = 0; int endIndex = source.GetSize(); const AlignmentInfo &align = targetPhrase.GetAlignTerm(); AlignmentInfo::const_iterator iter; for (iter = align.begin(); iter != align.end(); ++iter) { alignments.push_back(iter->first); alignments.push_back(iter->second); } for (int i = 0; i < targetPhrase.GetSize(); i++) { if (targetPhrase.GetWord(i).IsOOV()) myTargetPhrase.push_back("_TRANS_SLF_"); else myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string()); } for (int i = 0; i < source.GetSize(); i++) { mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string()); } obj.setPhrases(mySourcePhrase , myTargetPhrase); obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize()); obj.computeOSMFeature(startIndex,myBitmap); obj.calculateOSMProb(*OSM); obj.populateScores(scores); estimatedFutureScore.PlusEquals(this, scores); }
Phrase TrellisPath::GetSurfacePhrase() const { std::vector<FactorType> const& oFactor = manager().options()->output.factor_order; Phrase targetPhrase = GetTargetPhrase(); Phrase ret(targetPhrase.GetSize()); for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) { Word &newWord = ret.AddWord(); for (size_t i = 0 ; i < oFactor.size() ; i++) { FactorType factorType = oFactor[i]; const Factor *factor = targetPhrase.GetFactor(pos, factorType); UTIL_THROW_IF2(factor == NULL, "No factor " << factorType << " at position " << pos); newWord[factorType] = factor; } } return ret; }
bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const { if (inputPhrase.GetSize() != GetSize()) { return false; } for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) { if (GetFactor(currPos, factorType) != inputPhrase.GetFactor(currPos, factorType)) return false; } return true; }
void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToAdd) { assert(phrase.GetSize() == m_targetPhrase.GetSize()); if (featuresToAdd.size() == 1) { m_targetPhrase.MergeFactors(phrase, featuresToAdd[0]); } else if (featuresToAdd.empty()) { /* features already there, just update score */ } else { m_targetPhrase.MergeFactors(phrase, featuresToAdd); } m_scoreBreakdown.PlusEquals(score); }
TargetPhraseVectorPtr PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const { // There is no souch source phrase if source phrase is longer than longest // observed source phrase during compilation if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) return TargetPhraseVectorPtr(); // Retrieve target phrase collection from phrase table return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false); }