Exemplo n.º 1
0
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system)
{
  const StaticData& staticData = StaticData::Instance();
  if (m_implementation == Memory) {
    // memory phrase table
    VERBOSE(2,"using standard phrase tables" << std::endl);
    if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
      m_filePath += ".gz";
      VERBOSE(2,"Using gzipped file" << std::endl);
    }
    if (staticData.GetInputType() != SentenceInput) {
      UserMessage::Add("Must use binary phrase table for this input type");
      CHECK(false);
    }

    PhraseDictionaryMemory* pdm  = new PhraseDictionaryMemory(m_numScoreComponent,this);
    bool ret = pdm->Load(GetInput(), GetOutput()
                         , m_filePath
                         , m_weight
                         , m_tableLimit
                         , system->GetLanguageModels()
                         , system->GetWeightWordPenalty());
    CHECK(ret);
    return pdm;
  } else if (m_implementation == Binary) {
    PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
    bool ret = pdta->Load(                    GetInput()
               , GetOutput()
               , m_filePath
               , m_weight
               , m_tableLimit
               , system->GetLanguageModels()
               , system->GetWeightWordPenalty());
    CHECK(ret);
    return pdta;
  } else if (m_implementation == SCFG || m_implementation == Hiero) {
    // memory phrase table
    if (m_implementation == Hiero) {
      VERBOSE(2,"using Hiero format phrase tables" << std::endl);
    } else {
      VERBOSE(2,"using Moses-formatted SCFG phrase tables" << std::endl);
    }
    if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
      m_filePath += ".gz";
      VERBOSE(2,"Using gzipped file" << std::endl);
    }

    RuleTableTrie *dict;
    if (staticData.GetParsingAlgorithm() == ParseScope3) {
      dict = new RuleTableUTrie(m_numScoreComponent, this);
    } else {
      dict = new PhraseDictionarySCFG(m_numScoreComponent, this);
    }
    bool ret = dict->Load(GetInput()
                         , GetOutput()
                         , m_filePath
                         , m_weight
                         , m_tableLimit
                         , system->GetLanguageModels()
                         , system->GetWordPenaltyProducer());
    assert(ret);
    return dict;
  } else if (m_implementation == ALSuffixArray) {
    // memory phrase table
    VERBOSE(2,"using Hiero format phrase tables" << std::endl);
    if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
      m_filePath += ".gz";
      VERBOSE(2,"Using gzipped file" << std::endl);
    }
    
    PhraseDictionaryALSuffixArray* pdm  = new PhraseDictionaryALSuffixArray(m_numScoreComponent,this);
    bool ret = pdm->Load(GetInput()
                         , GetOutput()
                         , m_filePath
                         , m_weight
                         , m_tableLimit
                         , system->GetLanguageModels()
                         , system->GetWordPenaltyProducer());
    CHECK(ret);
    return pdm;
  } else if (m_implementation == OnDisk) {

    PhraseDictionaryOnDisk* pdta = new PhraseDictionaryOnDisk(m_numScoreComponent, this);
    bool ret = pdta->Load(GetInput()
                          , GetOutput()
                          , m_filePath
                          , m_weight
                          , m_tableLimit
                          , system->GetLanguageModels()
                          , system->GetWordPenaltyProducer());
    CHECK(ret);
    return pdta;
  } else if (m_implementation == SuffixArray) {
#ifndef WIN32
    PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(m_numScoreComponent, this);
    if(!(pd->Load(
           GetInput()
           ,GetOutput()
           ,m_filePath
           ,m_targetFile
           , m_alignmentsFile
           , m_weight, m_tableLimit
           , system->GetLanguageModels()
           , system->GetWeightWordPenalty()))) {
      std::cerr << "FAILED TO LOAD\n" << endl;
      delete pd;
      pd = NULL;
    }
    std::cerr << "Suffix array phrase table loaded" << std::endl;
    return pd;
#else
    CHECK(false);
#endif
  } else if (m_implementation == FuzzyMatch) {
    
    PhraseDictionaryFuzzyMatch *dict = new PhraseDictionaryFuzzyMatch(m_numScoreComponent, this);

    bool ret = dict->Load(GetInput()
                          , GetOutput()
                          , m_filePath
                          , m_weight
                          , m_tableLimit
                          , system->GetLanguageModels()
                          , system->GetWordPenaltyProducer());
    assert(ret);

    return dict;    
  } else if (m_implementation == Compact) {
#ifndef WIN32
    VERBOSE(2,"Using compact phrase table" << std::endl);                                                                                                                               
                                                                                                                                      
    PhraseDictionaryCompact* pd  = new PhraseDictionaryCompact(m_numScoreComponent, m_implementation, this);                         
    bool ret = pd->Load(GetInput(), GetOutput()                                                                                      
                         , m_filePath                                                                                                 
                         , m_weight                                                                                                   
                         , m_tableLimit                                                                                               
                         , system->GetLanguageModels()                                                                                
                         , system->GetWeightWordPenalty());                                                                           
    assert(ret);                                                                                                                      
    return pd;                                                                                                                       
#else
    CHECK(false);
#endif
  }  
  else {
    std::cerr << "Unknown phrase table type " << m_implementation << endl;
    CHECK(false);
  }
}
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system) {
        const StaticData& staticData = StaticData::Instance();
	if (m_implementation == Memory)
	{   // memory phrase table
		VERBOSE(2,"using standard phrase tables" << std::endl);
		if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
				m_filePath += ".gz";
				VERBOSE(2,"Using gzipped file" << std::endl);
		}
		if (staticData.GetInputType() != SentenceInput)
		{
				UserMessage::Add("Must use binary phrase table for this input type");
				assert(false);
		}
		
		PhraseDictionaryMemory* pdm  = new PhraseDictionaryMemory(m_numScoreComponent,this);
		bool ret = pdm->Load(GetInput(), GetOutput()
												, m_filePath
												, m_weight
												, m_tableLimit
																									, system->GetLanguageModels()
												, system->GetWeightWordPenalty());
		assert(ret);
		return pdm;
	}
	else if (m_implementation == Binary)
	{    
		PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
		bool ret = pdta->Load(                    GetInput()
                                            , GetOutput()
											, m_filePath
											, m_weight
											, m_tableLimit
											, system->GetLanguageModels()
											, system->GetWeightWordPenalty());
		assert(ret);
    return pdta;
	}
	else if (m_implementation == SCFG)
	{   // memory phrase table
		VERBOSE(2,"using New Format phrase tables" << std::endl);
		if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
			m_filePath += ".gz";
			VERBOSE(2,"Using gzipped file" << std::endl);
		}
		
		PhraseDictionarySCFG* pdm  = new PhraseDictionarySCFG(m_numScoreComponent,this);
		bool ret = pdm->Load(GetInput()
																		 , GetOutput()
								 , m_filePath
								 , m_weight
								 , m_tableLimit
								 , system->GetLanguageModels()
								 , system->GetWordPenaltyProducer());
		assert(ret);
		return pdm;
	}
	else if (m_implementation == OnDisk)
	{   
		
		PhraseDictionaryOnDisk* pdta = new PhraseDictionaryOnDisk(m_numScoreComponent, this);
		bool ret = pdta->Load(GetInput()
              , GetOutput()
							, m_filePath
							, m_weight
							, m_tableLimit
              , system->GetLanguageModels()
              , system->GetWordPenaltyProducer());
		assert(ret);
		return pdta;
	}
	else if (m_implementation == SuffixArray)
	{   
		#ifndef WIN32
		PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(m_numScoreComponent, this); 	 
		if(!(pd->Load(
                                                 GetInput()
                                                ,GetOutput()
												,m_filePath
												,m_targetFile
												, m_alignmentsFile 	 
												, m_weight, m_tableLimit 	 
												, system->GetLanguageModels() 	 
												, system->GetWeightWordPenalty()))) 	 
		{ 	 
			std::cerr << "FAILED TO LOAD\n" << endl; 	 
			delete pd;
			pd = NULL; 	 
		} 	 
		std::cerr << "Suffix array phrase table loaded" << std::endl;
		return pd;
		#else
			assert(false);
		#endif
    } else {
          std::cerr << "Unknown phrase table type " << m_implementation << endl;
          assert(false);
    }
}