示例#1
0
int parseline(istream& inp, int Order,ngram& ng,float& prob,float& bow)
{

  const char* words[1+ LMTMAXLEV + 1 + 1];
  int howmany;
  char line[MAX_LINE];

  inp.getline(line,MAX_LINE);
  if (strlen(line)==MAX_LINE-1) {
    cerr << "parseline: input line exceed MAXLINE ("
         << MAX_LINE << ") chars " << line << "\n";
    exit(1);
  }

  howmany = parseWords(line, words, Order + 3);

  if (!(howmany == (Order+ 1) || howmany == (Order + 2)))
    assert(howmany == (Order+ 1) || howmany == (Order + 2));

  //read words
  ng.size=0;
  for (int i=1; i<=Order; i++)
    ng.pushw(strcmp(words[i],"<unk>")?words[i]:ng.dict->OOV());

  //read logprob/code and logbow/code
  assert(sscanf(words[0],"%f",&prob));
  if (howmany==(Order+2))
    assert(sscanf(words[Order+1],"%f",&bow));
  else
    bow=0.0; //this is log10prob=0 for implicit backoff

  return 1;
}
示例#2
0
double normcache::get(ngram ng,int size,double& value)
{

  if (size==2) {
    if (*ng.wordp(2) < cachesize[0])
      return value=cache[0][*ng.wordp(2)];
    else
      return value=0;
  } else if (size==3) {
    if (ngt->get(ng,size,size-1)) {
      hit++;
      //      cerr << "hit " << ng << "\n";
      return value=cache[1][ng.freq];
    } else {
      miss++;
      return value=0;
    }
  }
  return 0;
}
示例#3
0
double mdiadaptlm::prob(ngram ng,int size,double& fstar,double& lambda, double& bo)
{
  double pr;

#ifdef MDIADAPTLM_CACHE_ENABLE
  //probcache hit
  if (size<=max_caching_level && probcache[size] && ng.size>=size && probcache[size]->get(ng.wordp(size),pr))
    return pr;
#endif

  //probcache miss
  mdiadaptlm::bodiscount(ng,size,fstar,lambda,bo);

  if (fstar >1.0000001 || lambda >1.0000001) {
    cerr << "wrong probability: " << ng
         << " , size " << size
         << " , fstar " << fstar
         << " , lambda " << lambda << "\n";
    exit(1);
  }
  if (backoff) {

    if (size>1) {
      if (fstar>0) pr=fstar;
      else {
        if (lambda<1)
          pr = lambda/bo * prob(ng,size-1);
        else {
          assert(lambda < 1.00000001);
          pr = prob(ng,size-1);
        }
      }
    } else
      pr = fstar;
  }

  else { //interpolation

    if (size>1)
      pr = fstar  + lambda * prob(ng,size-1);
    else
      pr = fstar;
  }

#ifdef MDIADAPTLM_CACHE_ENABLE
  //probcache insert
  if (size<=max_caching_level && probcache[size] && ng.size>=size)
    probcache[size]->add(ng.wordp(size),pr);
#endif

  return pr;
}
示例#4
0
double normcache::put(ngram ng,int size,double value)
{

  if (size==2) {
    if (*ng.wordp(2)>= maxcache[0]) expand(0);
    cache[0][*ng.wordp(2)]=value;
    cachesize[0]++;
    return value;
  } else if (size==3) {
    if (ngt->get(ng,size,size-1))
      return cache[1][ng.freq]=value;
    else {
      ngram histo(dict,2);
      *histo.wordp(1)=*ng.wordp(2);
      *histo.wordp(2)=*ng.wordp(3);
      histo.freq=cachesize[1]++;
      if (cachesize[1]==maxcache[1]) expand(1);
      ngt->put(histo);
      return cache[1][histo.freq]=value;
    }
  }
  return 0;
}
示例#5
0
//creates the ngramtable on demand from the sublm tables
int mixture::get(ngram& ng,int n,int lev)
{

	if (usefulltable)
	{
		return ngramtable::get(ng,n,lev);
	}
		
  //free current tree
  resetngramtable();

  //get 1-word prefix from ng
  ngram ug(dict,1);
  *ug.wordp(1)=*ng.wordp(ng.size);

  //local ngram to upload entries
  ngram locng(dict,maxlevel());

  //allocate subtrees from sublm
  for (int i=0; i<numslm; i++) {

    ngram subug(sublm[i]->dict,1);
    subug.trans(ug);

    if (sublm[i]->get(subug,1,1)) {

      ngram subng(sublm[i]->dict,maxlevel());
      *subng.wordp(maxlevel())=*subug.wordp(1);
      sublm[i]->scan(subug.link,subug.info,1,subng,INIT,maxlevel());
      while(sublm[i]->scan(subug.link,subug.info,1,subng,CONT,maxlevel())) {
        locng.trans(subng);
        put(locng);
      }
    }
  }

  return ngramtable::get(ng,n,lev);

}
示例#6
0
double interplm::unigrWB(ngram ng)
{
  return
	((double)(dict->freq(*ng.wordp(1))+epsilon))/
	((double)dict->totfreq() + (double) dict->size() * epsilon);
}
示例#7
-1
double mdiadaptlm::txclprob(ngram ng,int size)
{

  double fstar,lambda;

  if (size>1) {
    mdiadaptlm::discount(ng,size,fstar,lambda);
    return fstar  + lambda * txclprob(ng,size-1);
  } else {
    double freq=1;
    if ((*ng.wordp(1)!=dict->oovcode()) && get(ng,1,1))
      freq+=ng.freq;

    double N=totfreq()+dict->dub()-dict->size();
    return freq/N;
  }
}