void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const { const Phrase &sourcePhrase = inputPath.GetPhrase(); size_t hash = hash_value(sourcePhrase); CacheColl &cache = GetCache(); std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter; iter = cache.find(hash); if (iter != cache.end()) { // already in cache const TargetPhraseCollection *tpColl = iter->second.first; inputPath.SetTargetPhrases(*this, tpColl, NULL); } else { // TRANSLITERATE char *ptr = tmpnam(NULL); string inFile(ptr); ptr = tmpnam(NULL); string outDir(ptr); ofstream inStream(inFile.c_str()); inStream << sourcePhrase.ToString() << endl; inStream.close(); string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" + " --transliteration-model-dir " + m_filePath + " --moses-src-dir " + m_mosesDir + " --external-bin-dir " + m_externalDir + " --input-extension " + m_inputLang + " --output-extension " + m_outputLang + " --oov-file " + inFile + " --out-dir " + outDir; int ret = system(cmd.c_str()); UTIL_THROW_IF2(ret != 0, "Transliteration script error"); TargetPhraseCollection *tpColl = new TargetPhraseCollection(); vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir); vector<TargetPhrase*>::const_iterator iter; for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) { TargetPhrase *tp = *iter; tpColl->Add(tp); } std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock()); cache[hash] = value; inputPath.SetTargetPhrases(*this, tpColl, NULL); // clean up temporary files remove(inFile.c_str()); cmd = "rm -rf " + outDir; system(cmd.c_str()); } }
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath) const { OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation()); const Phrase &phrase = inputPath.GetPhrase(); const InputPath *prevInputPath = inputPath.GetPrevPath(); const OnDiskPt::PhraseNode *prevPtNode = NULL; if (prevInputPath) { prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this)); } else { // Starting subphrase. assert(phrase.GetSize() == 1); prevPtNode = &wrapper.GetRootSourceNode(); } // backoff if (!SatisfyBackoff(inputPath)) { return; } if (prevPtNode) { Word lastWord = phrase.GetWord(phrase.GetSize() - 1); lastWord.OnlyTheseFactors(m_inputFactors); OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord); if (lastWordOnDisk == NULL) { // OOV according to this phrase table. Not possible to extend inputPath.SetTargetPhrases(*this, NULL, NULL); } else { const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper); if (ptNode) { const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode); inputPath.SetTargetPhrases(*this, targetPhrases, ptNode); } else { inputPath.SetTargetPhrases(*this, NULL, NULL); } delete lastWordOnDisk; } } }