std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), auxClearString(e.GetStringRep(m_FactorsE)), auxClearString(c.GetStringRep(m_FactorsC))); }
std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, const Phrase& e) const { std::string key; if(!m_FactorsF.empty()){ key += auxClearString(f.GetStringRep(m_FactorsF)); } if(!m_FactorsE.empty()){ if(!key.empty()){ key += "|||"; } key += auxClearString(e.GetStringRep(m_FactorsE)); } return key; };
void LexicalReorderingTableMemory:: LoadFromFile(const std::string& filePath) { std::string fileName = filePath; if(!FileExists(fileName) && FileExists(fileName+".gz")) fileName += ".gz"; InputFileStream file(fileName); std::string line(""), key(""); int numScores = -1; std::cerr << "Loading table into memory..."; while(!getline(file, line).eof()) { std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||"); int t = 0 ; std::string f(""),e(""),c(""); if(!m_FactorsF.empty()) { //there should be something for f f = auxClearString(tokens.at(t)); ++t; } if(!m_FactorsE.empty()) { //there should be something for e e = auxClearString(tokens.at(t)); ++t; } if(!m_FactorsC.empty()) { //there should be something for c c = auxClearString(tokens.at(t)); ++t; } //last token are the probs std::vector<float> p = Scan<float>(Tokenize(tokens.at(t))); //sanity check: all lines must have equall number of probs if(-1 == numScores) { numScores = (int)p.size(); //set in first line } if((int)p.size() != numScores) { TRACE_ERR( "found inconsistent number of probabilities... found " << p.size() << " expected " << numScores << std::endl); exit(0); } std::transform(p.begin(),p.end(),p.begin(),TransformScore); std::transform(p.begin(),p.end(),p.begin(),FloorScore); //save it all into our map m_Table[MakeKey(f,e,c)] = p; } std::cerr << "done.\n"; }
void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f) { if(m_FactorsE.empty()) { //f is all of key... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; } else { ObjectPool<PPimp> pool; PPimp* pPos = m_Table->GetRoot(); //1) goto subtree for f for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) { /* old code pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId); */ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); } if(0 != pPos && pPos->isValid()) { pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); } if(0 == pPos || !pPos->isValid()) { return; } //2) explore whole subtree depth first & cache std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; std::vector<State> stack; stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); Candidates cands; while(!stack.empty()) { if(stack.back().pos->isValid()) { LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); //cache this m_Table->GetCandidates(*stack.back().pos,&cands); if(!cands.empty()) { m_Cache[cache_key + auxClearString(next_path)] = cands; } cands.clear(); PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); ++stack.back().pos->idx; stack.push_back(State(next_pos,next_path)); } else { stack.pop_back(); } } } }
std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { /* std::string key; if(!m_FactorsF.empty()){ key += f.GetStringRep(m_FactorsF); } if(!m_FactorsE.empty()){ if(!key.empty()){ key += " ||| "; } key += e.GetStringRep(m_FactorsE); } */ return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), auxClearString(e.GetStringRep(m_FactorsE)), auxClearString(c.GetStringRep(m_FactorsC))); }