std::string LexicalReorderingTableCompact::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { return MakeKey(Trim(f.GetStringRep(m_FactorsF)), Trim(e.GetStringRep(m_FactorsE)), Trim(c.GetStringRep(m_FactorsC))); }
std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), auxClearString(e.GetStringRep(m_FactorsE)), auxClearString(c.GetStringRep(m_FactorsC))); }
std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, const Phrase& e) const { std::string key; if(!m_FactorsF.empty()){ key += auxClearString(f.GetStringRep(m_FactorsF)); } if(!m_FactorsE.empty()){ if(!key.empty()){ key += "|||"; } key += auxClearString(e.GetStringRep(m_FactorsE)); } return key; };
void Manager::OutputBest(OutputCollector *collector) const { if (!collector) { return; } std::ostringstream out; FixPrecision(out); const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); if (StaticData::Instance().GetOutputHypoScore()) { out << "0 "; } out << '\n'; } else { if (StaticData::Instance().GetOutputHypoScore()) { out << best->label.score << " "; } Phrase yield = GetOneBestTargetYield(*best); // delete 1st & last UTIL_THROW_IF2(yield.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); yield.RemoveWord(0); yield.RemoveWord(yield.GetSize()-1); out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); out << '\n'; } collector->Write(m_source.GetTranslationId(), out.str()); }
std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const { /* std::string key; if(!m_FactorsF.empty()){ key += f.GetStringRep(m_FactorsF); } if(!m_FactorsE.empty()){ if(!key.empty()){ key += " ||| "; } key += e.GetStringRep(m_FactorsE); } */ return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), auxClearString(e.GetStringRep(m_FactorsE)), auxClearString(c.GetStringRep(m_FactorsC))); }
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel) { // Not using TargetPhraseCollection avoiding "new" operator // which can introduce heavy locking with multiple threads TargetPhraseVectorPtr tpv(new TargetPhraseVector()); size_t bitsLeft = 0; if(m_coding == PREnc) { std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl = m_decodingCache.Retrieve(sourcePhrase); // Has been cached and is complete or does not need to be completed if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0)) return cachedPhraseColl.first; // Has been cached, but is incomplete else if(cachedPhraseColl.first != NULL) { bitsLeft = cachedPhraseColl.second; tpv->resize(cachedPhraseColl.first->size()); std::copy(cachedPhraseColl.first->begin(), cachedPhraseColl.first->end(), tpv->begin()); } } // Retrieve source phrase identifier std::string sourcePhraseString = sourcePhrase.GetStringRep(*m_input); size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)]; if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) { // Retrieve compressed and encoded target phrase collection std::string encodedPhraseCollection; if(m_phraseDictionary.m_inMemory) encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId]; else encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId]; BitWrapper<> encodedBitStream(encodedPhraseCollection); if(m_coding == PREnc && bitsLeft) encodedBitStream.SeekFromEnd(bitsLeft); // Decompress and decode target phrase collection TargetPhraseVectorPtr decodedPhraseColl = DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel); return decodedPhraseColl; } else return TargetPhraseVectorPtr(); }
void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f) { if(m_FactorsE.empty()) { //f is all of key... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; } else { ObjectPool<PPimp> pool; PPimp* pPos = m_Table->GetRoot(); //1) goto subtree for f for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) { /* old code pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId); */ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); } if(0 != pPos && pPos->isValid()) { pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); } if(0 == pPos || !pPos->isValid()) { return; } //2) explore whole subtree depth first & cache std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; std::vector<State> stack; stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); Candidates cands; while(!stack.empty()) { if(stack.back().pos->isValid()) { LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); //cache this m_Table->GetCandidates(*stack.back().pos,&cands); if(!cands.empty()) { m_Cache[cache_key + auxClearString(next_path)] = cands; } cands.clear(); PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); ++stack.back().pos->idx; stack.push_back(State(next_pos,next_path)); } else { stack.pop_back(); } } } }
//Find top n translations of source, and send them to output static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary* phraseTable, const std::vector<FactorType> &input, ostream& out) { //get list of target phrases Phrase sourcePhrase; sourcePhrase.CreateFromString(Input,input,sourcePhraseString,NULL); InputPath inputPath(sourcePhrase, NonTerminalSet(), WordsRange(0,sourcePhrase.GetSize()-1),NULL,NULL); InputPathList inputPaths; inputPaths.push_back(&inputPath); phraseTable->GetTargetPhraseCollectionBatch(inputPaths); const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable); //print phrases const std::vector<FactorType>& output = StaticData::Instance().GetOutputFactorOrder(); if (targetPhrases) { //if (targetPhrases->GetSize() > 10) cerr << "src " << sourcePhrase << " tgt count " << targetPhrases->GetSize() << endl; for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) { const TargetPhrase* targetPhrase = *i; out << sourcePhrase.GetStringRep(input); out << " ||| "; out << targetPhrase->GetStringRep(output); out << " ||| "; const ScoreComponentCollection scores = targetPhrase->GetScoreBreakdown(); vector<float> phraseScores = scores.GetScoresForProducer(phraseTable); for (size_t j = 0; j < phraseScores.size(); ++j) { out << exp(phraseScores[j]) << " "; } out << "||| "; const AlignmentInfo& align = targetPhrase->GetAlignTerm(); for (AlignmentInfo::const_iterator j = align.begin(); j != align.end(); ++j) { out << j->first << "-" << j->second << " "; } out << endl; } } }