void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f) { if(m_FactorsE.empty()) { //f is all of key... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; } else { ObjectPool<PPimp> pool; PPimp* pPos = m_Table->GetRoot(); //1) goto subtree for f for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) { /* old code pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId); */ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); } if(0 != pPos && pPos->isValid()) { pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); } if(0 == pPos || !pPos->isValid()) { return; } //2) explore whole subtree depth first & cache std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; std::vector<State> stack; stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); Candidates cands; while(!stack.empty()) { if(stack.back().pos->isValid()) { LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); //cache this m_Table->GetCandidates(*stack.back().pos,&cands); if(!cands.empty()) { m_Cache[cache_key + auxClearString(next_path)] = cands; } cands.clear(); PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); ++stack.back().pos->idx; stack.push_back(State(next_pos,next_path)); } else { stack.pop_back(); } } } }
void RuleTableLoaderCompact::LoadPhraseSection( LineReader &reader, const std::vector<Word> &vocab, std::vector<Phrase> &rhsPhrases, std::vector<size_t> &lhsIds) { // Read phrase count. reader.ReadLine(); const size_t phraseCount = std::atoi(reader.m_line.c_str()); // Reads lines, storing Phrase object for each RHS and vocab ID for each LHS. rhsPhrases.resize(phraseCount, Phrase(0)); lhsIds.resize(phraseCount); std::vector<size_t> tokenPositions; for (size_t i = 0; i < phraseCount; ++i) { reader.ReadLine(); tokenPositions.clear(); FindTokens(tokenPositions, reader.m_line); const char *charLine = reader.m_line.c_str(); lhsIds[i] = std::atoi(charLine+tokenPositions[0]); for (size_t j = 1; j < tokenPositions.size(); ++j) { rhsPhrases[i].AddWord(vocab[std::atoi(charLine+tokenPositions[j])]); } } }
Phrase PinyinPhraseLib::append (const WideString &phrase, const PinyinKeyVector &keys) { if (phrase.length () == 0 || !valid ()) return Phrase (); Phrase tmp = m_phrase_lib.find (phrase); if (tmp.valid () && tmp.is_enable ()) return tmp; tmp = m_phrase_lib.append (phrase); if (!tmp.valid ()) return Phrase (); insert_phrase_into_index (tmp, keys); return tmp; }
bool QphHandler::endElement( const QString& /* namespaceURI */, const QString& /* localName */, const QString& qName ) { if ( qName == QString("source") ) source = accum; else if ( qName == QString("target") ) target = accum; else if ( qName == QString("definition") ) definition = accum; else if ( qName == QString("phrase") ) pb->append( Phrase(source, target, definition) ); return TRUE; }
void PhraseBookBox::save() { pb.clear(); QListViewItem *item = lv->firstChild(); while ( item != 0 ) { if ( !item->text(PhraseLVI::SourceTextShown).isEmpty() && item->text(PhraseLVI::SourceTextShown) != NewPhrase ) pb.append( Phrase(((PhraseLVI *) item)->phrase()) ); item = item->nextSibling(); } if ( !pb.save( fn ) ) QMessageBox::warning( this, tr("Qt Linguist"), tr("Cannot save phrase book '%1'.").arg(fn) ); }
int main() { std::unordered_map< std::string, int > dictionary; //std::ifstream input("../data/corpus/europarl.lowercased.en", std::ios::in); int phrases[][3] = { { 0, 1, 2 }, { 0, 2, 3 }, { 2, 3, 1 }, }; /*for (std::string line; std::getline(input, line); ) { std::istringstream iss(line); Phrase phrase; for (std::string word; iss >> word; ) phrase.push_back(getId(dictionary, word)); phrases.push_back(phrase); if (phrases.size() == 1000) break; }*/ std::vector< Phrase > phs; for (int i = 0; i < 3; ++i) phs.push_back(Phrase(phrases[i], phrases[i+1])); LanguageModel model(learn_ngram_language_model(phs, 4, 2, 0.0001)); for (int i = 0; i < 4; ++i) { for (int j= 0; j < 4; ++j) { Phrase phrase; phrase.push_back(i); phrase.push_back(j); printf("%d %d %lf\n", i, j, model.get_probability(phrase)); } Phrase phrase; phrase.push_back(i); printf("%d %lf\n", i, model.get_probability(phrase)); } Phrase phrase; phrase.push_back(2); phrase.push_back(3); phrase.push_back(1); printf("2 3 1 %lf\n", model.get_probability(phrase)); model.save("lmodel.bin"); if (!(model == load_ngram_language_model("lmodel.bin"))) throw 42; }
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const { return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR)); }
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const { return m_table->GetScore(f, e, Phrase(Output)); }
Phrase PhraseLVI::phrase() const { return Phrase( text(SourceTextOriginal), text(TargetTextOriginal), text(DefinitionText) ); }