PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system) { const StaticData& staticData = StaticData::Instance(); if (m_implementation == Memory) { // memory phrase table VERBOSE(2,"using standard phrase tables" << std::endl); if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) { m_filePath += ".gz"; VERBOSE(2,"Using gzipped file" << std::endl); } if (staticData.GetInputType() != SentenceInput) { UserMessage::Add("Must use binary phrase table for this input type"); CHECK(false); } PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this); bool ret = pdm->Load(GetInput(), GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()); CHECK(ret); return pdm; } else if (m_implementation == Binary) { PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this); bool ret = pdta->Load( GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()); CHECK(ret); return pdta; } else if (m_implementation == SCFG || m_implementation == Hiero) { // memory phrase table if (m_implementation == Hiero) { VERBOSE(2,"using Hiero format phrase tables" << std::endl); } else { VERBOSE(2,"using Moses-formatted SCFG phrase tables" << std::endl); } if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) { m_filePath += ".gz"; VERBOSE(2,"Using gzipped file" << std::endl); } RuleTableTrie *dict; if (staticData.GetParsingAlgorithm() == ParseScope3) { dict = new RuleTableUTrie(m_numScoreComponent, this); } else { dict = new PhraseDictionarySCFG(m_numScoreComponent, this); } bool ret = dict->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); assert(ret); return dict; } else if (m_implementation == ALSuffixArray) { // memory phrase table VERBOSE(2,"using Hiero format phrase tables" << std::endl); if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) { m_filePath += ".gz"; VERBOSE(2,"Using gzipped file" << std::endl); } PhraseDictionaryALSuffixArray* pdm = new PhraseDictionaryALSuffixArray(m_numScoreComponent,this); bool ret = pdm->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); CHECK(ret); return pdm; } else if (m_implementation == OnDisk) { PhraseDictionaryOnDisk* pdta = new PhraseDictionaryOnDisk(m_numScoreComponent, this); bool ret = pdta->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); CHECK(ret); return pdta; } else if (m_implementation == SuffixArray) { #ifndef WIN32 PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(m_numScoreComponent, this); if(!(pd->Load( GetInput() ,GetOutput() ,m_filePath ,m_targetFile , m_alignmentsFile , m_weight, m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()))) { std::cerr << "FAILED TO LOAD\n" << endl; delete pd; pd = NULL; } std::cerr << "Suffix array phrase table loaded" << std::endl; return pd; #else CHECK(false); #endif } else if (m_implementation == FuzzyMatch) { PhraseDictionaryFuzzyMatch *dict = new PhraseDictionaryFuzzyMatch(m_numScoreComponent, this); bool ret = dict->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); assert(ret); return dict; } else if (m_implementation == Compact) { #ifndef WIN32 VERBOSE(2,"Using compact phrase table" << std::endl); PhraseDictionaryCompact* pd = new PhraseDictionaryCompact(m_numScoreComponent, m_implementation, this); bool ret = pd->Load(GetInput(), GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()); assert(ret); return pd; #else CHECK(false); #endif } else { std::cerr << "Unknown phrase table type " << m_implementation << endl; CHECK(false); } }
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system) { const StaticData& staticData = StaticData::Instance(); if (m_implementation == Memory) { // memory phrase table VERBOSE(2,"using standard phrase tables" << std::endl); if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) { m_filePath += ".gz"; VERBOSE(2,"Using gzipped file" << std::endl); } if (staticData.GetInputType() != SentenceInput) { UserMessage::Add("Must use binary phrase table for this input type"); assert(false); } PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this); bool ret = pdm->Load(GetInput(), GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()); assert(ret); return pdm; } else if (m_implementation == Binary) { PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this); bool ret = pdta->Load( GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()); assert(ret); return pdta; } else if (m_implementation == SCFG) { // memory phrase table VERBOSE(2,"using New Format phrase tables" << std::endl); if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) { m_filePath += ".gz"; VERBOSE(2,"Using gzipped file" << std::endl); } PhraseDictionarySCFG* pdm = new PhraseDictionarySCFG(m_numScoreComponent,this); bool ret = pdm->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); assert(ret); return pdm; } else if (m_implementation == OnDisk) { PhraseDictionaryOnDisk* pdta = new PhraseDictionaryOnDisk(m_numScoreComponent, this); bool ret = pdta->Load(GetInput() , GetOutput() , m_filePath , m_weight , m_tableLimit , system->GetLanguageModels() , system->GetWordPenaltyProducer()); assert(ret); return pdta; } else if (m_implementation == SuffixArray) { #ifndef WIN32 PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(m_numScoreComponent, this); if(!(pd->Load( GetInput() ,GetOutput() ,m_filePath ,m_targetFile , m_alignmentsFile , m_weight, m_tableLimit , system->GetLanguageModels() , system->GetWeightWordPenalty()))) { std::cerr << "FAILED TO LOAD\n" << endl; delete pd; pd = NULL; } std::cerr << "Suffix array phrase table loaded" << std::endl; return pd; #else assert(false); #endif } else { std::cerr << "Unknown phrase table type " << m_implementation << endl; assert(false); } }