std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), auxClearString(e.GetStringRep(m_FactorsE)), auxClearString(c.GetStringRep(m_FactorsC))); }
void Phrase::MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec) { UTIL_THROW_IF2(GetSize() != copy.GetSize(), "Both phrases need to be the same size to merge"); for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) for (std::vector<FactorType>::const_iterator i = factorVec.begin(); i != factorVec.end(); ++i) { SetFactor(currPos, *i, copy.GetFactor(currPos, *i)); } }
Phrase::Phrase(const Phrase ©) :m_words(copy.GetSize()) { for (size_t pos = 0; pos < copy.GetSize(); ++pos) { const Word &oldWord = copy.GetWord(pos); Word *newWord = new Word(oldWord); m_words[pos] = newWord; } }
void Phrase::MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec) { CHECK(GetSize() == copy.GetSize()); for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) for (std::vector<FactorType>::const_iterator i = factorVec.begin(); i != factorVec.end(); ++i) { SetFactor(currPos, *i, copy.GetFactor(currPos, *i)); } }
//-------------------------------------------------------------- void testApp::makePhrase(string _p, int _v ){ Phrase temp; pos.set( ofRandom(10, ofGetWidth() - font.stringWidth(_p) ) ,ofRandom(font.getLineHeight(), ofGetHeight() - font.getLineHeight() )); ofColor c = color.getColor( ofRandom(color.width), 50); temp.setup(_p, _v, &font, pos, &bg, c); phrases.push_back(temp); phrase = ""; }
std::string LexicalReorderingTableCompact:: MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { return MakeKey(Trim(f.GetStringRep(m_FactorsF)), Trim(e.GetStringRep(m_FactorsE)), Trim(c.GetStringRep(m_FactorsC))); }
bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const { if (inputPhrase.GetSize() != GetSize()) { return false; } for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) { if (GetFactor(currPos, factorType) != inputPhrase.GetFactor(currPos, factorType)) return false; } return true; }
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line) { std::vector<string> toks; Moses::Tokenize(toks, line); vec.resize(toks.size()); for (size_t i = 0; i < vec.size(); ++i) { const string &tok = toks[i]; Word *word = new Word(i, tok); vec[i] = word; } }
void Manager::CreateInputPaths() { for (size_t pos = 0; pos < m_sentence.GetSize(); ++pos) { Phrase *phrase = new Phrase(1); phrase->Set(0, m_sentence.GetWord(pos)); InputPath *path = new InputPath(NULL, phrase, pos); m_inputPathQueue.push_back(path); CreateInputPaths(*path, pos + 1); } }
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); size_t position; if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; } else { position = 0; } size_t ngramBoundary = m_ngram->Order() - 1; size_t end_loop = std::min(ngramBoundary, phrase.GetSize()); for (; position < end_loop; ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } float before_boundary = fullScore + scorer.Finish(); for (; position < phrase.GetSize(); ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } fullScore += scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
void Phrase::MergeFactors(const Phrase ©) { assert(GetSize() == copy.GetSize()); size_t size = GetSize(); const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection()); for (size_t currPos = 0 ; currPos < size ; currPos++) { for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++) { FactorType factorType = static_cast<FactorType>(currFactor); const Factor *factor = copy.GetFactor(currPos, factorType); if (factor != NULL) SetFactor(currPos, factorType, factor); } } }
void Phrase::MergeFactors(const Phrase ©) { UTIL_THROW_IF2(GetSize() != copy.GetSize(), "Both phrases need to be the same size to merge"); size_t size = GetSize(); const size_t maxNumFactors = MAX_NUM_FACTORS; for (size_t currPos = 0 ; currPos < size ; currPos++) { for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++) { FactorType factorType = static_cast<FactorType>(currFactor); const Factor *factor = copy.GetFactor(currPos, factorType); if (factor != NULL) SetFactor(currPos, factorType, factor); } } }
void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pugi::xml_node &parentNode, const Parameter ¶ms) { int childNum = 0; for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); // span label string label; int startPos = output.size(); if (!nodeName.empty()) { pugi::xml_attribute attribute = childNode.attribute("label"); label = attribute.as_string(); // recursively call this function. For proper recursive trees XMLParse(output, tree, childNode, params); } // fill phrase vector string text = childNode.value(); Escape(text); //cerr << childNum << " " << label << "=" << text << endl; std::vector<string> toks; Moses::Tokenize(toks, text); for (size_t i = 0; i < toks.size(); ++i) { const string &tok = toks[i]; Word *word = new Word(output.size(), tok); output.push_back(word); } // is it a labelled span? int endPos = output.size() - 1; // fill syntax labels if (!label.empty()) { label = "[" + label + "]"; tree.Add(startPos, endPos, label, params); } ++childNum; } }
void Manager::CreateInputPaths(const InputPath &prevPath, size_t pos) { if (pos >= m_sentence.GetSize()) { return; } Phrase *phrase = new Phrase(prevPath.GetPhrase(), 1); phrase->SetLastWord(m_sentence.GetWord(pos)); InputPath *path = new InputPath(&prevPath, phrase, pos); m_inputPathQueue.push_back(path); CreateInputPaths(*path, pos + 1); }
void Phrase::MergeFactors(const Phrase ©) { CHECK(GetSize() == copy.GetSize()); size_t size = GetSize(); const size_t maxNumFactors = MAX_NUM_FACTORS; for (size_t currPos = 0 ; currPos < size ; currPos++) { for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++) { FactorType factorType = static_cast<FactorType>(currFactor); const Factor *factor = copy.GetFactor(currPos, factorType); if (factor != NULL) SetFactor(currPos, factorType, factor); } } }
std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, const Phrase& e) const { std::string key; if(!m_FactorsF.empty()){ key += auxClearString(f.GetStringRep(m_FactorsF)); } if(!m_FactorsE.empty()){ if(!key.empty()){ key += "|||"; } key += auxClearString(e.GetStringRep(m_FactorsE)); } return key; };
bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const { if (inputPhrase.GetSize() != GetSize()) { return false; } for (size_t currPos = 0 ; currPos < GetSize() ; currPos++) { for (std::vector<FactorType>::const_iterator i = factorVec.begin(); i != factorVec.end(); ++i) { if (GetFactor(currPos, *i) != inputPhrase.GetFactor(currPos, *i)) return false; } } return true; }
bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const { if (phrase.GetSize()==0) return false; // whether phrase contains all factors in this LM const Word &word = phrase.GetWord(0); for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor) { if (m_factorTypes[currFactor] && word[currFactor] == NULL) return false; } return true; }
/** * Pre-calculate the n-gram probabilities for the words in the specified phrase. * * Note that when this method is called, we do not have access to the context * in which this phrase will eventually be applied. * * In other words, we know what words are in this phrase, * but we do not know what words will come before or after this phrase. * * The parameters fullScore, ngramScore, and oovCount are all output parameters. * * The value stored in oovCount is the number of words in the phrase * that are not in the language model's vocabulary. * * The sum of the ngram scores for all words in this phrase are stored in fullScore. * * The value stored in ngramScore is similar, but only full-order ngram scores are included. * * This is best shown by example: * * Assume a trigram backward language model and a phrase "a b c d e f g" * * fullScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) * * ngramScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) */ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); UTIL_THROW_IF( (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)), util::Exception, "BackwardLanguageModel does not currently support rules that include <s>" ); float before_boundary = 0.0f; int lastWord = phrase.GetSize() - 1; int ngramBoundary = m_ngram->Order() - 1; int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary; int position; for (position = lastWord; position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; if (position==boundary) { before_boundary = scorer.Finish(); } } fullScore = scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f) { if(m_FactorsE.empty()) { //f is all of key... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; } else { ObjectPool<PPimp> pool; PPimp* pPos = m_Table->GetRoot(); //1) goto subtree for f for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) { /* old code pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId); */ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); } if(0 != pPos && pPos->isValid()) { pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); } if(0 == pPos || !pPos->isValid()) { return; } //2) explore whole subtree depth first & cache std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; std::vector<State> stack; stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); Candidates cands; while(!stack.empty()) { if(stack.back().pos->isValid()) { LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); //cache this m_Table->GetCandidates(*stack.back().pos,&cands); if(!cands.empty()) { m_Cache[cache_key + auxClearString(next_path)] = cands; } cands.clear(); PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); ++stack.back().pos->idx; stack.push_back(State(next_pos,next_path)); } else { stack.pop_back(); } } } }
size_t Found(const Phrase &source, int pos, int factor, const std::string &str) { const size_t MAX_RANGE = 10; vector<string> soughts = Moses::Tokenize(str, " "); vector<string> puncts = Moses::Tokenize(". : , ;", " "); size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE); for (size_t i = pos + 1; i < maxEnd; ++i) { const Word &word = source[i]; bool found; found = Found(word, factor, puncts); if (found) { return std::numeric_limits<size_t>::max(); } found = Found(word, factor, soughts); if (found) { return i; } } return std::numeric_limits<size_t>::max(); }
void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const { // get length of source and target phrase size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = source.GetSize(); // create feature names stringstream nameSource; nameSource << "s" << sourceLength; stringstream nameTarget; nameTarget << "t" << targetLength; stringstream nameBoth; nameBoth << sourceLength << "," << targetLength; // increase feature counts scoreBreakdown.PlusEquals(this,nameSource.str(),1); scoreBreakdown.PlusEquals(this,nameTarget.str(),1); scoreBreakdown.PlusEquals(this,nameBoth.str(),1); //cerr << nameSource.str() << " " << nameTarget.str() << " " << nameBoth.str() << endl; }
void PinyinPhraseLib::optimize_phrase_frequencies (uint32 max_freq) { uint32 freq = m_phrase_lib.get_max_phrase_frequency (); if (freq < max_freq || !max_freq) return; double ratio = ((double) max_freq) / freq; Phrase phrase; for (int i = 0; i<(int)m_phrase_lib.number_of_phrases (); ++i) { phrase = m_phrase_lib.get_phrase_by_index (i); phrase.set_frequency ((uint32)(phrase.frequency () * ratio)); } }
// score ngrams of words that have been added before the previous word span void BleuScoreFeature::GetNgramMatchCounts_prefix(Phrase& phrase, const NGrams& ref_ngram_counts, std::vector< size_t >& ret_counts, std::vector< size_t >& ret_matches, size_t new_start_indices, size_t last_end_index) const { NGrams::const_iterator ref_ngram_counts_iter; size_t ngram_start_idx, ngram_end_idx; // Chiang et al (2008) use unclipped counts of ngram matches for (size_t start_idx = 0; start_idx < new_start_indices; start_idx++) { for (size_t order = 0; order < BleuScoreState::bleu_order; order++) { ngram_start_idx = start_idx; ngram_end_idx = start_idx + order; if (order > ngram_end_idx) break; if (ngram_end_idx > last_end_index) break; Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0); ret_counts[order]++; ref_ngram_counts_iter = ref_ngram_counts.find(ngram); if (ref_ngram_counts_iter != ref_ngram_counts.end()) ret_matches[order]++; } } }
void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const { const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0]; if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) { return; } if ( !m_nonGlueRules && (targetPhraseLHS != m_glueTargetLHS) ) { return; } for (size_t posS=0; posS<source.GetSize(); ++posS) { const Word &wordS = source.GetWord(posS); if ( !wordS.IsNonTerminal() ) { return; } } ostringstream namestr; for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) { const Word &wordT = targetPhrase.GetWord(posT); const Factor* factorT = wordT[0]; if ( wordT.IsNonTerminal() ) { namestr << "["; } namestr << factorT->GetString(); if ( wordT.IsNonTerminal() ) { namestr << "]"; } namestr << "|"; } namestr << targetPhraseLHS->GetString() << "|"; for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin(); it!=targetPhrase.GetAlignNonTerm().end(); ++it) { namestr << "|" << it->first << "-" << it->second; } scoreBreakdown.PlusEquals(this, namestr.str(), 1); if ( targetPhraseLHS != m_glueTargetLHS ) { scoreBreakdown.PlusEquals(this, 1); } }
void OnDiskQuery::Tokenize(Phrase &phrase, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm) { bool nonTerm = false; size_t tokSize = token.size(); int comStr =token.compare(0, 1, "["); if (comStr == 0) { comStr = token.compare(tokSize - 1, 1, "]"); nonTerm = comStr == 0; } if (nonTerm) { // non-term size_t splitPos = token.find_first_of("[", 2); std::string wordStr = token.substr(0, splitPos); if (splitPos == std::string::npos) { // lhs - only 1 word WordPtr word (new Word()); word->CreateFromString(wordStr, m_wrapper.GetVocab()); phrase.AddWord(word); } else { // source & target non-terms if (addSourceNonTerm) { WordPtr word( new Word()); word->CreateFromString(wordStr, m_wrapper.GetVocab()); phrase.AddWord(word); } wordStr = token.substr(splitPos, tokSize - splitPos); if (addTargetNonTerm) { WordPtr word(new Word()); word->CreateFromString(wordStr, m_wrapper.GetVocab()); phrase.AddWord(word); } } } else { // term WordPtr word(new Word()); word->CreateFromString(token, m_wrapper.GetVocab()); phrase.AddWord(word); } }
//! set walls based on "-monotone-at-punctuation" flag void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence ) { for( size_t i=0; i<sentence.GetSize(); i++ ) { const Word& word = sentence.GetWord(i); if (word[0]->GetString() == "," || word[0]->GetString() == "." || word[0]->GetString() == "!" || word[0]->GetString() == "?" || word[0]->GetString() == ":" || word[0]->GetString() == ";" || word[0]->GetString() == "\"") { // set wall before and after punc, but not at sentence start, end if (i>0 && i<m_size-1) SetWall( i, true ); if (i>1) SetWall( i-1, true ); } } }
void PartDisplay::calculate() { _useColour = false; _r = 255; _g = 255; _b = 255; Phrase *phr = p->phrase(); DisplayParams *d = p->displayParams(); DisplayParams *phrd = phr ? phr->displayParams() : 0; if (d->style() != DisplayParams::None) { if (d->style() == DisplayParams::Default && phr) { // Get values from Phrase if (phrd->style() == DisplayParams::Colour) { phrd->colour(_r, _g, _b); _useColour = true; } else if (phrd->style() == DisplayParams::PresetColour && preset) { preset->colour(phrd->presetColour(), _r, _g, _b); _useColour = true; } } else if (d->style() != DisplayParams::Default) { // Values from Part: must be using colour _useColour = true; if (d->style() == DisplayParams::Colour) { d->colour(_r, _g, _b); _useColour = true; } else if (preset) { preset->colour(d->presetColour(), _r, _g, _b); _useColour = true; } } } _calculated = true; }
void PinyinPhraseLib::create_pinyin_index () { if (!m_pinyin_table || !m_pinyin_table->size()) return; clear_phrase_index (); uint32 pinyin_offset = 0; WideString content; Phrase phrase; for (uint32 i=0; i<m_phrase_lib.number_of_phrases (); i++) { phrase = m_phrase_lib.get_phrase_by_index (i); content = phrase.get_content (); std::vector<PinyinKeyVector> key_vv; m_pinyin_table->find_key_strings (key_vv, content); for (uint32 j=0; j<key_vv.size(); j++) { for (uint32 k=0; k<key_vv[j].size(); k++) m_pinyin_lib.push_back (key_vv[j][k]); insert_pinyin_phrase_into_index (phrase.get_phrase_offset (), pinyin_offset); pinyin_offset = m_pinyin_lib.size (); } #if 0 if (key_vv.size () > 1 && content.length () > 1) { for (uint32 x=0; x<key_vv.size (); x++) { std::cerr << phrase.frequency () << "\t| " << utf8_wcstombs (content) << " ="; for (uint32 y=0; y<key_vv[x].size (); y++) std::cerr << " " << key_vv[x][y]; std::cerr << "\n"; } } #endif std::cout << "." << std::flush; } sort_phrase_tables (); std::cout << "Phrase Number = " << count_phrase_number () << "\n"; }
void Hypothesis:: GetOutputPhrase(Phrase &out) const { if (m_prevHypo != NULL) m_prevHypo->GetOutputPhrase(out); out.Append(GetCurrTargetPhrase()); }