コード例 #1
0
ファイル: IRST.cpp プロジェクト: EktaGupta28/mosesdecoder
void LanguageModelIRST::Load(AllOptions::ptr const& opts)
{
  FactorCollection &factorCollection = FactorCollection::Instance();

  m_lmtb = m_lmtb->CreateLanguageModel(m_filePath);
  if (m_lmtb_size > 0) m_lmtb->setMaxLoadedLevel(m_lmtb_size);
  m_lmtb->load(m_filePath);
  d=m_lmtb->getDict();
  d->incflag(1);

  m_nGramOrder = m_lmtb_size = m_lmtb->maxlevel();

  // LM can be ok, just outputs warnings
  // Mauro: in the original, the following two instructions are wrongly switched:
  m_unknownId = d->oovcode(); // at the level of micro tags
  m_empty = -1; // code for an empty position

  CreateFactors(factorCollection);

  VERBOSE(1, GetScoreProducerDescription() << "  LanguageModelIRST::Load() m_unknownId=" << m_unknownId << std::endl);

  //install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags)
  m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2);

  if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
}
コード例 #2
0
ファイル: Rand.cpp プロジェクト: Kitton/mosesdecoder
void LanguageModelRandLM::Load()
{
    cerr << "Loading LanguageModelRandLM..." << endl;
    FactorCollection &factorCollection = FactorCollection::Instance();
    int cache_MB = 50; // increase cache size
    m_lm = randlm::RandLM::initRandLM(m_filePath, m_nGramOrder, cache_MB);
    CHECK(m_lm != NULL);
    // get special word ids
    m_oov_id = m_lm->getWordID(m_lm->getOOV());
    CreateFactors(factorCollection);
    m_lm->initThreadSpecificData();
}
コード例 #3
0
ファイル: SRI.cpp プロジェクト: chesio/mosesdecoder
void LanguageModelSRI::Load()
{
  m_srilmVocab  = new ::Vocab();
  m_srilmModel	= new Ngram(*m_srilmVocab, m_nGramOrder);

  m_srilmModel->skipOOVs() = false;

  File file( m_filePath.c_str(), "r" );
  m_srilmModel->read(file);

  // LM can be ok, just outputs warnings
  CreateFactors();
  m_unknownId = m_srilmVocab->unkIndex();
}
コード例 #4
0
bool LanguageModelSRI::Load(const std::string &filePath
                            , FactorType factorType
                            , size_t nGramOrder)
{
  m_srilmVocab  = new ::Vocab();
  m_srilmModel	= new Ngram(*m_srilmVocab, nGramOrder);
  m_factorType 	= factorType;
  m_nGramOrder	= nGramOrder;
  m_filePath		= filePath;

  m_srilmModel->skipOOVs() = false;

  File file( filePath.c_str(), "r" );
  m_srilmModel->read(file);

  // LM can be ok, just outputs warnings
  CreateFactors();
  m_unknownId = m_srilmVocab->unkIndex();

  return true;
}
bool LanguageModelParallelBackoff::Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder)
{

  cerr << "Loading Language Model Parallel Backoff!!!\n";
  widMatrix = new ::WidMatrix();
  m_factorTypes	= FactorMask(factorTypes);
  m_srilmVocab = new ::FactoredVocab();
  //assert(m_srilmVocab != 0);

  fnSpecs = 0;
  File f(filePath.c_str(),"r");
  fnSpecs = new ::FNgramSpecs<FNgramCount>(f,*m_srilmVocab, 0/*debug*/);

  cerr << "Loaded fnSpecs!\n";

  m_srilmVocab->unkIsWord() = true;
  m_srilmVocab->nullIsWord() = true;
  m_srilmVocab->toLower() = false;

  FNgramStats *factoredStats = new FNgramStats(*m_srilmVocab, *fnSpecs);

  factoredStats->debugme(2);

  cerr << "Factored stats\n";

  FNgram* fngramLM = new FNgram(*m_srilmVocab,*fnSpecs);
  assert(fngramLM != 0);

  cerr << "FNgram object created\n";

  fngramLM->skipOOVs = false;

  if (!factoredStats->read()) {
    cerr << "error reading in counts in factor file\n";
    exit(1);
  }

  cerr << "Factored stats read!\n";

  factoredStats->estimateDiscounts();
  factoredStats->computeCardinalityFunctions();
  factoredStats->sumCounts();

  cerr << "Another three operations made!\n";

  if (!fngramLM->read()) {
    cerr << "format error in lm file\n";
    exit(1);
  }

  cerr << "fngramLM reads!\n";

  m_filePath = filePath;
  m_nGramOrder= nGramOrder;

  m_factorTypesOrdered= factorTypes;

  m_unknownId = m_srilmVocab->unkIndex();

  cerr << "m_unknowdId = " << m_unknownId << endl;

  m_srilmModel = fngramLM;

  cerr << "Create factors...\n";

  CreateFactors();

  cerr << "Factors created! \n";
  //FactorCollection &factorCollection = FactorCollection::Instance();

  /*for (size_t index = 0 ; index < m_factorTypesOrdered.size() ; ++index)
  {
  	FactorType factorType = m_factorTypesOrdered[index];
  	m_sentenceStartArray[factorType] 	= factorCollection.AddFactor(Output, factorType, BOS_);


  	m_sentenceEndArray[factorType] 		= factorCollection.AddFactor(Output, factorType, EOS_);

    //factorIdStart = m_sentenceStartArray[factorType]->GetId();
    //factorIdEnd = m_sentenceEndArray[factorType]->GetId();

    for (size_t i = 0; i < 10; i++)
    {
      lmIdMap[factorIdStart * 10 + i] = GetLmID(BOS_);
  		lmIdMap[factorIdEnd * 10 + i] = GetLmID(EOS_);
    }

  	//(*lmIdMap)[factorIdStart * 10 + index] = GetLmID(BOS_);
  	//(*lmIdMap)[factorIdEnd * 10 + index] = GetLmID(EOS_);

  }*/
  return true;
}
コード例 #6
0
bool LanguageModelIRST::Load(const std::string &filePath, 
			     FactorType factorType, 
			     float weight,
			     size_t nGramOrder)
{
  const char *SepString = " \t\n";
  cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";

  FactorCollection &factorCollection = FactorCollection::Instance();

  m_factorType 	 = factorType;
  m_weight			 = weight;
  m_nGramOrder	 = nGramOrder;

  // get name of LM file and, if any, of the micro-macro map file
  char *filenamesOrig = strdup(filePath.c_str());
  char *filenames = filenamesOrig;
  m_filePath = strsep(&filenames, SepString);

  // Open the input file (possibly gzipped)
  InputFileStream inp(m_filePath);

  if (filenames) {
    // case LMfile + MAPfile: create an object of lmmacro class and load both LM file and map
    cerr << "Loading LM file + MAP\n";
    m_mapFilePath = strsep(&filenames, SepString);
    if (!FileExists(m_mapFilePath)) {
      cerr << "ERROR: Map file <" << m_mapFilePath << "> does not exist\n";
			free(filenamesOrig);
      return false;
    }
    InputFileStream inpMap(m_mapFilePath);
    m_lmtb = new lmmacro(m_filePath, inp, inpMap);


  } else {
    // case (standard) LMfile only: create an object of lmtable
    cerr << "Loading LM file (no MAP)\n";
    m_lmtb  = (lmtable *)new lmtable;

  // Load the (possibly binary) model
#ifdef WIN32
    m_lmtb->load(inp); //don't use memory map
#else
    if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0)
      m_lmtb->load(inp,m_filePath.c_str(),NULL,1);
    else 
      m_lmtb->load(inp,m_filePath.c_str(),NULL,0);
#endif  

  }

  m_lmtb_ng=new ngram(m_lmtb->getDict()); // ngram of words/micro tags
  m_lmtb_size=m_lmtb->maxlevel();

  // LM can be ok, just outputs warnings

  // Mauro: in the original, the following two instructions are wrongly switched:
  m_unknownId = m_lmtb->getDict()->oovcode(); // at the level of micro tags
  CreateFactors(factorCollection);

  VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);

  //install caches
  m_lmtb->init_probcache();
  m_lmtb->init_statecache();
  m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);

  if (m_lmtb_dub >0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);

	free(filenamesOrig);
  return true;
}
コード例 #7
0
ファイル: DSLPNLConverter.cpp プロジェクト: JacobCWard/PyPNL
CBNet* DSLPNLConverter::CreateBNet(DSL_network &dslNet)
{
    CBNet* pnlBNet = NULL;
    
    // Create mapping nodeId <-> number
    theIds.CleanUp();
    
    // Traverse through all the nodes and check if we have only CPTs
    // if happens noisy-MAX, convert it to CPT
    // Create a list of DSL_ids
    int handle = dslNet.GetFirstNode();
    while (handle>=0)
    {
        if (dslNet.GetNode(handle)->Definition()->GetType()!=DSL_CPT)
        {
            if (dslNet.GetNode(handle)->Definition()->GetType()==DSL_NOISY_MAX)
            {
                int res = dslNet.GetNode(handle)->ChangeType(DSL_CPT);
                if (res!=DSL_OKAY)
                    return NULL;
            }
            else
            {
                return NULL;
            }
        }
        theIds.Add(dslNet.GetNode(handle)->Info().Header().GetId());
        handle = dslNet.GetNextNode(handle);
    }
    
    // Read number of nodes in the net
    // Just for sake of safety
    int numberOfNodes = dslNet.GetNumberOfNodes();
    if (numberOfNodes!=theIds.NumItems())
    {
        std::cout << "something went wrong!" << std::endl;
        return NULL;
    }
    
    // some debug stuff
#ifdef DSLPNL_DEBUG
    int i;
    std::cerr << "Number of nodes  : " << numberOfNodes << std::endl;
    std::cerr << "DSL_ids: " << std::endl;
    for (i=0; i<numberOfNodes; i++)
        std::cerr << i << " : " << theIds[i] << std::endl;
#endif
    
    // Create CGraph
    CGraph* pnlGraph = CreateCGraph(dslNet);
    if (pnlGraph==NULL)
    {
        std::cout << "PNL graph not created!" << std::endl;
        return NULL;
    }
    
    // Create BNet 
    pnlBNet = CreateCBNet(dslNet,pnlGraph);
    if (pnlBNet==NULL)
    {
        std::cout << "PNL Bnet not created!" << std::endl;
        return NULL;
    }
    
    // Allcoate factors
    CreateFactors(dslNet,pnlBNet);
    
    return pnlBNet;
}