예제 #1
0
Model* TableCategorical::ReadModel(ByteReader* byte_reader, const Schema& schema, size_t index) {
    size_t predictor_size = byte_reader->ReadByte();
    size_t cell_size = byte_reader->ReadByte();
    std::vector<size_t> predictor_list;
    for (size_t i = 0; i < predictor_size; ++i ) {
        size_t pred = byte_reader->Read16Bit();
        predictor_list.push_back(pred);
    }
    // set err to 0 because err is only used in training
    TableCategorical* model = new TableCategorical(schema, predictor_list, index, 0);
    
    size_t target_range = byte_reader->Read16Bit();
    model->target_range_ = target_range;

    // Read Model Parameters
    size_t table_size = model->dynamic_list_.size();
    for (size_t i = 0; i < table_size; ++i ) {
        std::vector<Prob>& prob_segs = model->dynamic_list_[i].prob;
        prob_segs.resize(target_range - 1);
        for (size_t j = 0; j < prob_segs.size(); ++j ) 
        if (cell_size == 16) { 
            prob_segs[j] = GetProb(byte_reader->Read16Bit(), 16);
        } else {
            prob_segs[j] = GetProb(byte_reader->ReadByte(), 8);
        }
    }
    
    return model;
}
예제 #2
0
/* GetProb: return nSize-gram probability for ngram in wlab */
static double GetProb(LabId *wlab, int nSize)
{
   /*
      this routine will return the interpolated nSize-gram probability for
      the words in wlab. Note that the context maybe shortened in the
      case of multiple LMs and words which do not occur in some of them.
   */

   int i,j;
   LMInfo *li;
   Boolean inThisLM,inAnyLM;
   double x,prob,psum;
   NameId nGram[LM_NSIZE];

   if (nLModel==1) {
      inThisLM = TRUE;
      for (j=0; j<nSize; j++) {
	 if ((nGram[j] = l2nId[0][(int) (wlab[j]->aux)])==NULL)
	    inThisLM = FALSE;
      }
      if (inThisLM) {
         prob = GetNGramProb(lmInfo[0].lm, nGram, nSize);
      }
      else if (nSize > 1)
         prob = GetProb(wlab+1,nSize-1);
      else {
         prob = LZERO;
         HError(-16690,"GetProb: assigning zero probability");
      }
   } else {
      psum = 0.0;
      inAnyLM = FALSE;
      for (li=lmInfo, i=0; i<nLModel; i++, li++) {
	 for (inThisLM=TRUE, j=0; j<nSize; j++) {
	    if ((nGram[j] = l2nId[i][(int) (wlab[j]->aux)])==NULL)
	       inThisLM = FALSE;
	 }
	 if (!inThisLM)
	    continue;
         x = GetNGramProb(li->lm, nGram, nSize);

#ifdef INTERPOLATE_MAX
	 if ((x = exp(x)) > psum)
	    psum = x;
#else
	 psum += li->weight*exp(x);
#endif
	 inAnyLM = TRUE;
      }
      if (inAnyLM)
	 prob = log(psum);
      else if (nSize > 1)
	 prob = GetProb(wlab+1,nSize-1);
      else {
	 prob = LZERO;
	 HError(-16690,"GetProb: assigning zero probability");
      }
   }
   return prob;
}
	ostream& Print( ostream& o ) const {
		 return o 
			 << GetSeqId() << "\t" 
			 << GetPos() << "\t"
			 << GetProb(0) << "\t"
			 << GetProb(1) << "\t"
			 << GetProb(2) << "\t"
			 << GetProb(3);
	}
예제 #4
0
Model* StringModel::ReadModel(ByteReader* byte_reader, size_t index) {
    StringModel* model = new StringModel(index);
    model->char_count_.clear();
    model->length_count_.clear();
    model->char_prob_.resize(255);
    model->length_prob_.resize(63);
    for (int i = 0; i < 255; ++i ) 
        model->char_prob_[i] = GetProb(byte_reader->Read16Bit(), 16);
    for (int i = 0; i < 63; ++i )
        model->length_prob_[i] = GetProb(byte_reader->ReadByte(), 8);
    return model;
}
예제 #5
0
/* CalcPerplexity: compute perplexity and other statistics */
static void CalcPerplexity(PStats *sent, LabId *pLab, int numPLabs, int nSize)
{
   int i,j;
   LabId *p;
   float prob;
   Boolean hasOOV;

   for (p=pLab, i=nSize-1; i<numPLabs; i++, p++)
   {
      if (pLab[i]==unkId)
	 continue;	           /* cannot predict OOVs */
      if (skipOOV)
      {
	 hasOOV = FALSE;
	 for (j=1; j<nSize; j++)
	 {
	    if (pLab[i-j]==unkId)
	    {
	      hasOOV=TRUE;
	      break;
	    }
	 }
	 if (hasOOV) continue; /* skip to next label since context contains OOV */
      }
      prob = GetProb(p, nSize);
      sent->nWrd++; sent->logpp += prob; sent->logpp2 += prob*prob;

      if (outStreamFN != NULL)
         fprintf(outStream,"%e\n",exp(prob));

      if (trace&T_PROB)
      {
	 printf("logP(%s |", pLab[i]->name);
	 for (j=1; j<nSize; j++)
	 {
	   printf(" %s%s", (j==1)?"":",", pLab[i-j]->name);
	 }
	 printf(") = %.4f\n", prob);
	 /* if (trace&T_INST_INFO) PrintInstStats(nSize); */
	 fflush(stdout);
      }
   }
   if (trace&T_SENT)
      PrintInfo(sent,FALSE);
}