Beispiel #1
0
LMResult LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
  LMResult ret;
  FactorType	factorType = GetFactorType();
  size_t count = contextFactor.size();
  if (count <= 0) {
    if(finalState)
      *finalState = NULL;
    ret.score = 0.0;
    ret.unknown = false;
    return ret;
  }

  // set up context
  VocabIndex ngram[count + 1];
  for (size_t i = 0 ; i < count - 1 ; i++) {
    ngram[i+1] =  GetLmID((*contextFactor[count-2-i])[factorType]);
  }
  ngram[count] = Vocab_None;

  CHECK((*contextFactor[count-1])[factorType] != NULL);
  // call sri lm fn
  VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
  ret = GetValue(lmId, ngram+1);

  if (finalState) {
    ngram[0] = lmId;
    unsigned int dummy;
    *finalState = m_srilmModel->contextID(ngram, dummy);
  }
  return ret;
}
Beispiel #2
0
float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
{
	unsigned int dummy;
	if (!len) { len = &dummy; }
	FactorType factorType = GetFactorType();

	// set up context
	size_t count = contextFactor.size();
    
	m_lmtb_ng->size=0;
	if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
	if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);  

	for (size_t i = 0 ; i < count ; i++)
	{
	  //int lmId = GetLmID((*contextFactor[i])[factorType]);
#ifdef DEBUG
	  cout << "i=" << i << " -> " << (*contextFactor[i])[factorType]->GetString() << "\n";
#endif
	  int lmId = GetLmID((*contextFactor[i])[factorType]->GetString());
	  //	  cerr << (*contextFactor[i])[factorType]->GetString() << " = " << lmId;
	  m_lmtb_ng->pushc(lmId);
	}
  
	if (finalState){        
		*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);	
		// back off stats not currently available
		*len = 0;	
	}

	float prob = m_lmtb->clprob(*m_lmtb_ng);
  
  
	return TransformLMScore(prob);
}
LMResult LanguageModelSRI::GetValueBF(const vector< std::string> contextFactor, bool ISfinale) const
{

  LMResult ret;
  FactorType  factorType = GetFactorType();
  size_t count = contextFactor.size();
  if (count <= 0) {
    ret.score = 0.0;
    ret.unknown = false;
    return ret;
  }
  VocabIndex ngram[count + 1];
  for (size_t i = 0 ; i < count - 1 ; i++) {
     ngram[i+1] =  GetLmID(contextFactor[count-2-i]);
  }
  ngram[count] = Vocab_None;

  VocabIndex lmId = GetLmID(contextFactor[count-1]);
  ret = GetValue(lmId, ngram+1);

  if (ISfinale) {
    ngram[0] = lmId;
    unsigned int dummy;
  }
  return ret;

}
float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
	FactorType factorType = GetFactorType();

	// set up context
	size_t count = contextFactor.size();
        if (count < 0) { cerr << "ERROR count < 0\n"; exit(100); };

        // set up context
        int codes[MAX_NGRAM_SIZE];

	size_t idx=0;
        //fill the farthest positions with at most ONE sentenceEnd symbol and at most ONE sentenceEnd symbol, if "empty" positions are available
	//so that the vector looks like = "</s> <s> context_word context_word" for a two-word context and a LM of order 5
	if (count < (size_t) (m_lmtb_size-1)) codes[idx++] = m_lmtb_sentenceEnd;  
	if (count < (size_t) m_lmtb_size) codes[idx++] = m_lmtb_sentenceStart;  

        for (size_t i = 0 ; i < count ; i++)
                codes[idx++] =  GetLmID((*contextFactor[i])[factorType]);

        float prob;
        char* msp = NULL;
        unsigned int ilen;
        prob = m_lmtb->clprob(codes,idx,NULL,NULL,&msp,&ilen);

	if (finalState) *finalState=(State *) msp;

	return TransformLMScore(prob);
}
Beispiel #5
0
LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
                                       State* finalState) const
{
    FactorType factorType = GetFactorType();
    // set up context
    randlm::WordID ngram[MAX_NGRAM_SIZE];
    int count = contextFactor.size();
    for (int i = 0 ; i < count ; i++) {
        ngram[i] = GetLmID((*contextFactor[i])[factorType]);
        //std::cerr << m_lm->getWord(ngram[i]) << " ";
    }
    int found = 0;
    LMResult ret;
    ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, &found, finalState)));
    ret.unknown = count && (ngram[count - 1] == m_oov_id);
    //if (finalState)
    //  std::cerr << " = " << logprob << "(" << *finalState << ", " <<")"<< std::endl;
    //else
    //  std::cerr << " = " << logprob << std::endl;
    return ret;
}
Beispiel #6
0
LMResult LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFactor, State* finalState) const
{
  LMResult ret;
  ret.unknown = false;
  size_t count = contextFactor.size();
  if (count == 0) {
    if (finalState) *finalState = NULL;
    ret.score = 0.0;
    return ret;
  }
  //std::cerr << "contextFactor.size() = " << count << "\n";
  size_t max = m_nGramOrder;
  const FactorType factor = GetFactorType();
  if (max > count) max = count;

  Cache* cur = &m_cache;
  int pc = static_cast<int>(count) - 1;
  for (int i = 0; i < pc; ++i) {
    const Factor* f = contextFactor[i]->GetFactor(factor);
    cur = &cur->tree[f ? f : BOS];
  }
  const Factor* event_word = contextFactor[pc]->GetFactor(factor);
  cur = &cur->tree[event_word ? event_word : EOS];
  if (cur->prob) {
    if (finalState) *finalState = cur->boState;
    ret.score = cur->prob;
    return ret;
  }
  cur->boState = *reinterpret_cast<const State*>(&m_curId);
  ++m_curId;

  std::ostringstream os;
  os << "prob ";
  if (event_word == NULL) {
    os << "</s>";
  } else {
    os << event_word->GetString();
  }
  for (size_t i=1; i<max; i++) {
    const Factor* f = contextFactor[count-1-i]->GetFactor(factor);
    if (f == NULL) {
      os << " <s>";
    } else {
      os << ' ' << f->GetString();
    }
  }
  os << std::endl;
  std::string out = os.str();
  write(sock, out.c_str(), out.size());
  char res[6];
  int r = read(sock, res, 6);
  int errors = 0;
  int cnt = 0;
  while (1) {
    if (r < 0) {
      errors++;
      sleep(1);
      //std::cerr << "Error: read()\n";
      if (errors > 5) exit(1);
    } else if (r==0 || res[cnt] == '\n') {
      break;
    } else {
      cnt += r;
      if (cnt==6) break;
      read(sock, &res[cnt], 6-cnt);
    }
  }
  cur->prob = FloorScore(TransformLMScore(*reinterpret_cast<float*>(res)));
  if (finalState) {
    *finalState = cur->boState;
  }
  ret.score = cur->prob;
  return ret;
}