LMResult LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState) const { LMResult ret; FactorType factorType = GetFactorType(); size_t count = contextFactor.size(); if (count <= 0) { if(finalState) *finalState = NULL; ret.score = 0.0; ret.unknown = false; return ret; } // set up context VocabIndex ngram[count + 1]; for (size_t i = 0 ; i < count - 1 ; i++) { ngram[i+1] = GetLmID((*contextFactor[count-2-i])[factorType]); } ngram[count] = Vocab_None; CHECK((*contextFactor[count-1])[factorType] != NULL); // call sri lm fn VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]); ret = GetValue(lmId, ngram+1); if (finalState) { ngram[0] = lmId; unsigned int dummy; *finalState = m_srilmModel->contextID(ngram, dummy); } return ret; }
float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const { unsigned int dummy; if (!len) { len = &dummy; } FactorType factorType = GetFactorType(); // set up context size_t count = contextFactor.size(); m_lmtb_ng->size=0; if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd); if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart); for (size_t i = 0 ; i < count ; i++) { //int lmId = GetLmID((*contextFactor[i])[factorType]); #ifdef DEBUG cout << "i=" << i << " -> " << (*contextFactor[i])[factorType]->GetString() << "\n"; #endif int lmId = GetLmID((*contextFactor[i])[factorType]->GetString()); // cerr << (*contextFactor[i])[factorType]->GetString() << " = " << lmId; m_lmtb_ng->pushc(lmId); } if (finalState){ *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng); // back off stats not currently available *len = 0; } float prob = m_lmtb->clprob(*m_lmtb_ng); return TransformLMScore(prob); }
LMResult LanguageModelSRI::GetValueBF(const vector< std::string> contextFactor, bool ISfinale) const { LMResult ret; FactorType factorType = GetFactorType(); size_t count = contextFactor.size(); if (count <= 0) { ret.score = 0.0; ret.unknown = false; return ret; } VocabIndex ngram[count + 1]; for (size_t i = 0 ; i < count - 1 ; i++) { ngram[i+1] = GetLmID(contextFactor[count-2-i]); } ngram[count] = Vocab_None; VocabIndex lmId = GetLmID(contextFactor[count-1]); ret = GetValue(lmId, ngram+1); if (ISfinale) { ngram[0] = lmId; unsigned int dummy; } return ret; }
float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const { FactorType factorType = GetFactorType(); // set up context size_t count = contextFactor.size(); if (count < 0) { cerr << "ERROR count < 0\n"; exit(100); }; // set up context int codes[MAX_NGRAM_SIZE]; size_t idx=0; //fill the farthest positions with at most ONE sentenceEnd symbol and at most ONE sentenceEnd symbol, if "empty" positions are available //so that the vector looks like = "</s> <s> context_word context_word" for a two-word context and a LM of order 5 if (count < (size_t) (m_lmtb_size-1)) codes[idx++] = m_lmtb_sentenceEnd; if (count < (size_t) m_lmtb_size) codes[idx++] = m_lmtb_sentenceStart; for (size_t i = 0 ; i < count ; i++) codes[idx++] = GetLmID((*contextFactor[i])[factorType]); float prob; char* msp = NULL; unsigned int ilen; prob = m_lmtb->clprob(codes,idx,NULL,NULL,&msp,&ilen); if (finalState) *finalState=(State *) msp; return TransformLMScore(prob); }
LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const { FactorType factorType = GetFactorType(); // set up context randlm::WordID ngram[MAX_NGRAM_SIZE]; int count = contextFactor.size(); for (int i = 0 ; i < count ; i++) { ngram[i] = GetLmID((*contextFactor[i])[factorType]); //std::cerr << m_lm->getWord(ngram[i]) << " "; } int found = 0; LMResult ret; ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, &found, finalState))); ret.unknown = count && (ngram[count - 1] == m_oov_id); //if (finalState) // std::cerr << " = " << logprob << "(" << *finalState << ", " <<")"<< std::endl; //else // std::cerr << " = " << logprob << std::endl; return ret; }
LMResult LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFactor, State* finalState) const { LMResult ret; ret.unknown = false; size_t count = contextFactor.size(); if (count == 0) { if (finalState) *finalState = NULL; ret.score = 0.0; return ret; } //std::cerr << "contextFactor.size() = " << count << "\n"; size_t max = m_nGramOrder; const FactorType factor = GetFactorType(); if (max > count) max = count; Cache* cur = &m_cache; int pc = static_cast<int>(count) - 1; for (int i = 0; i < pc; ++i) { const Factor* f = contextFactor[i]->GetFactor(factor); cur = &cur->tree[f ? f : BOS]; } const Factor* event_word = contextFactor[pc]->GetFactor(factor); cur = &cur->tree[event_word ? event_word : EOS]; if (cur->prob) { if (finalState) *finalState = cur->boState; ret.score = cur->prob; return ret; } cur->boState = *reinterpret_cast<const State*>(&m_curId); ++m_curId; std::ostringstream os; os << "prob "; if (event_word == NULL) { os << "</s>"; } else { os << event_word->GetString(); } for (size_t i=1; i<max; i++) { const Factor* f = contextFactor[count-1-i]->GetFactor(factor); if (f == NULL) { os << " <s>"; } else { os << ' ' << f->GetString(); } } os << std::endl; std::string out = os.str(); write(sock, out.c_str(), out.size()); char res[6]; int r = read(sock, res, 6); int errors = 0; int cnt = 0; while (1) { if (r < 0) { errors++; sleep(1); //std::cerr << "Error: read()\n"; if (errors > 5) exit(1); } else if (r==0 || res[cnt] == '\n') { break; } else { cnt += r; if (cnt==6) break; read(sock, &res[cnt], 6-cnt); } } cur->prob = FloorScore(TransformLMScore(*reinterpret_cast<float*>(res))); if (finalState) { *finalState = cur->boState; } ret.score = cur->prob; return ret; }