void LanguageModelIRST::Load(AllOptions::ptr const& opts) { FactorCollection &factorCollection = FactorCollection::Instance(); m_lmtb = m_lmtb->CreateLanguageModel(m_filePath); if (m_lmtb_size > 0) m_lmtb->setMaxLoadedLevel(m_lmtb_size); m_lmtb->load(m_filePath); d=m_lmtb->getDict(); d->incflag(1); m_nGramOrder = m_lmtb_size = m_lmtb->maxlevel(); // LM can be ok, just outputs warnings // Mauro: in the original, the following two instructions are wrongly switched: m_unknownId = d->oovcode(); // at the level of micro tags m_empty = -1; // code for an empty position CreateFactors(factorCollection); VERBOSE(1, GetScoreProducerDescription() << " LanguageModelIRST::Load() m_unknownId=" << m_unknownId << std::endl); //install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags) m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2); if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); }
void LanguageModelRandLM::Load() { cerr << "Loading LanguageModelRandLM..." << endl; FactorCollection &factorCollection = FactorCollection::Instance(); int cache_MB = 50; // increase cache size m_lm = randlm::RandLM::initRandLM(m_filePath, m_nGramOrder, cache_MB); CHECK(m_lm != NULL); // get special word ids m_oov_id = m_lm->getWordID(m_lm->getOOV()); CreateFactors(factorCollection); m_lm->initThreadSpecificData(); }
void LanguageModelSRI::Load() { m_srilmVocab = new ::Vocab(); m_srilmModel = new Ngram(*m_srilmVocab, m_nGramOrder); m_srilmModel->skipOOVs() = false; File file( m_filePath.c_str(), "r" ); m_srilmModel->read(file); // LM can be ok, just outputs warnings CreateFactors(); m_unknownId = m_srilmVocab->unkIndex(); }
bool LanguageModelSRI::Load(const std::string &filePath , FactorType factorType , size_t nGramOrder) { m_srilmVocab = new ::Vocab(); m_srilmModel = new Ngram(*m_srilmVocab, nGramOrder); m_factorType = factorType; m_nGramOrder = nGramOrder; m_filePath = filePath; m_srilmModel->skipOOVs() = false; File file( filePath.c_str(), "r" ); m_srilmModel->read(file); // LM can be ok, just outputs warnings CreateFactors(); m_unknownId = m_srilmVocab->unkIndex(); return true; }
bool LanguageModelParallelBackoff::Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder) { cerr << "Loading Language Model Parallel Backoff!!!\n"; widMatrix = new ::WidMatrix(); m_factorTypes = FactorMask(factorTypes); m_srilmVocab = new ::FactoredVocab(); //assert(m_srilmVocab != 0); fnSpecs = 0; File f(filePath.c_str(),"r"); fnSpecs = new ::FNgramSpecs<FNgramCount>(f,*m_srilmVocab, 0/*debug*/); cerr << "Loaded fnSpecs!\n"; m_srilmVocab->unkIsWord() = true; m_srilmVocab->nullIsWord() = true; m_srilmVocab->toLower() = false; FNgramStats *factoredStats = new FNgramStats(*m_srilmVocab, *fnSpecs); factoredStats->debugme(2); cerr << "Factored stats\n"; FNgram* fngramLM = new FNgram(*m_srilmVocab,*fnSpecs); assert(fngramLM != 0); cerr << "FNgram object created\n"; fngramLM->skipOOVs = false; if (!factoredStats->read()) { cerr << "error reading in counts in factor file\n"; exit(1); } cerr << "Factored stats read!\n"; factoredStats->estimateDiscounts(); factoredStats->computeCardinalityFunctions(); factoredStats->sumCounts(); cerr << "Another three operations made!\n"; if (!fngramLM->read()) { cerr << "format error in lm file\n"; exit(1); } cerr << "fngramLM reads!\n"; m_filePath = filePath; m_nGramOrder= nGramOrder; m_factorTypesOrdered= factorTypes; m_unknownId = m_srilmVocab->unkIndex(); cerr << "m_unknowdId = " << m_unknownId << endl; m_srilmModel = fngramLM; cerr << "Create factors...\n"; CreateFactors(); cerr << "Factors created! \n"; //FactorCollection &factorCollection = FactorCollection::Instance(); /*for (size_t index = 0 ; index < m_factorTypesOrdered.size() ; ++index) { FactorType factorType = m_factorTypesOrdered[index]; m_sentenceStartArray[factorType] = factorCollection.AddFactor(Output, factorType, BOS_); m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_); //factorIdStart = m_sentenceStartArray[factorType]->GetId(); //factorIdEnd = m_sentenceEndArray[factorType]->GetId(); for (size_t i = 0; i < 10; i++) { lmIdMap[factorIdStart * 10 + i] = GetLmID(BOS_); lmIdMap[factorIdEnd * 10 + i] = GetLmID(EOS_); } //(*lmIdMap)[factorIdStart * 10 + index] = GetLmID(BOS_); //(*lmIdMap)[factorIdEnd * 10 + index] = GetLmID(EOS_); }*/ return true; }
bool LanguageModelIRST::Load(const std::string &filePath, FactorType factorType, float weight, size_t nGramOrder) { const char *SepString = " \t\n"; cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n"; FactorCollection &factorCollection = FactorCollection::Instance(); m_factorType = factorType; m_weight = weight; m_nGramOrder = nGramOrder; // get name of LM file and, if any, of the micro-macro map file char *filenamesOrig = strdup(filePath.c_str()); char *filenames = filenamesOrig; m_filePath = strsep(&filenames, SepString); // Open the input file (possibly gzipped) InputFileStream inp(m_filePath); if (filenames) { // case LMfile + MAPfile: create an object of lmmacro class and load both LM file and map cerr << "Loading LM file + MAP\n"; m_mapFilePath = strsep(&filenames, SepString); if (!FileExists(m_mapFilePath)) { cerr << "ERROR: Map file <" << m_mapFilePath << "> does not exist\n"; free(filenamesOrig); return false; } InputFileStream inpMap(m_mapFilePath); m_lmtb = new lmmacro(m_filePath, inp, inpMap); } else { // case (standard) LMfile only: create an object of lmtable cerr << "Loading LM file (no MAP)\n"; m_lmtb = (lmtable *)new lmtable; // Load the (possibly binary) model #ifdef WIN32 m_lmtb->load(inp); //don't use memory map #else if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0) m_lmtb->load(inp,m_filePath.c_str(),NULL,1); else m_lmtb->load(inp,m_filePath.c_str(),NULL,0); #endif } m_lmtb_ng=new ngram(m_lmtb->getDict()); // ngram of words/micro tags m_lmtb_size=m_lmtb->maxlevel(); // LM can be ok, just outputs warnings // Mauro: in the original, the following two instructions are wrongly switched: m_unknownId = m_lmtb->getDict()->oovcode(); // at the level of micro tags CreateFactors(factorCollection); VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl); //install caches m_lmtb->init_probcache(); m_lmtb->init_statecache(); m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2); if (m_lmtb_dub >0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); free(filenamesOrig); return true; }
CBNet* DSLPNLConverter::CreateBNet(DSL_network &dslNet) { CBNet* pnlBNet = NULL; // Create mapping nodeId <-> number theIds.CleanUp(); // Traverse through all the nodes and check if we have only CPTs // if happens noisy-MAX, convert it to CPT // Create a list of DSL_ids int handle = dslNet.GetFirstNode(); while (handle>=0) { if (dslNet.GetNode(handle)->Definition()->GetType()!=DSL_CPT) { if (dslNet.GetNode(handle)->Definition()->GetType()==DSL_NOISY_MAX) { int res = dslNet.GetNode(handle)->ChangeType(DSL_CPT); if (res!=DSL_OKAY) return NULL; } else { return NULL; } } theIds.Add(dslNet.GetNode(handle)->Info().Header().GetId()); handle = dslNet.GetNextNode(handle); } // Read number of nodes in the net // Just for sake of safety int numberOfNodes = dslNet.GetNumberOfNodes(); if (numberOfNodes!=theIds.NumItems()) { std::cout << "something went wrong!" << std::endl; return NULL; } // some debug stuff #ifdef DSLPNL_DEBUG int i; std::cerr << "Number of nodes : " << numberOfNodes << std::endl; std::cerr << "DSL_ids: " << std::endl; for (i=0; i<numberOfNodes; i++) std::cerr << i << " : " << theIds[i] << std::endl; #endif // Create CGraph CGraph* pnlGraph = CreateCGraph(dslNet); if (pnlGraph==NULL) { std::cout << "PNL graph not created!" << std::endl; return NULL; } // Create BNet pnlBNet = CreateCBNet(dslNet,pnlGraph); if (pnlBNet==NULL) { std::cout << "PNL Bnet not created!" << std::endl; return NULL; } // Allcoate factors CreateFactors(dslNet,pnlBNet); return pnlBNet; }