예제 #1
Model* TableCategorical::ReadModel(ByteReader* byte_reader, const Schema& schema, size_t index) {
    size_t predictor_size = byte_reader->ReadByte();
    size_t cell_size = byte_reader->ReadByte();
    std::vector<size_t> predictor_list;
    for (size_t i = 0; i < predictor_size; ++i ) {
        size_t pred = byte_reader->Read16Bit();
    // set err to 0 because err is only used in training
    TableCategorical* model = new TableCategorical(schema, predictor_list, index, 0);
    size_t target_range = byte_reader->Read16Bit();
    model->target_range_ = target_range;

    // Read Model Parameters
    size_t table_size = model->dynamic_list_.size();
    for (size_t i = 0; i < table_size; ++i ) {
        std::vector<Prob>& prob_segs = model->dynamic_list_[i].prob;
        prob_segs.resize(target_range - 1);
        for (size_t j = 0; j < prob_segs.size(); ++j ) 
        if (cell_size == 16) { 
            prob_segs[j] = GetProb(byte_reader->Read16Bit(), 16);
        } else {
            prob_segs[j] = GetProb(byte_reader->ReadByte(), 8);
    return model;
예제 #2
/* GetProb: return nSize-gram probability for ngram in wlab */
static double GetProb(LabId *wlab, int nSize)
      this routine will return the interpolated nSize-gram probability for
      the words in wlab. Note that the context maybe shortened in the
      case of multiple LMs and words which do not occur in some of them.

   int i,j;
   LMInfo *li;
   Boolean inThisLM,inAnyLM;
   double x,prob,psum;
   NameId nGram[LM_NSIZE];

   if (nLModel==1) {
      inThisLM = TRUE;
      for (j=0; j<nSize; j++) {
	 if ((nGram[j] = l2nId[0][(int) (wlab[j]->aux)])==NULL)
	    inThisLM = FALSE;
      if (inThisLM) {
         prob = GetNGramProb(lmInfo[0].lm, nGram, nSize);
      else if (nSize > 1)
         prob = GetProb(wlab+1,nSize-1);
      else {
         prob = LZERO;
         HError(-16690,"GetProb: assigning zero probability");
   } else {
      psum = 0.0;
      inAnyLM = FALSE;
      for (li=lmInfo, i=0; i<nLModel; i++, li++) {
	 for (inThisLM=TRUE, j=0; j<nSize; j++) {
	    if ((nGram[j] = l2nId[i][(int) (wlab[j]->aux)])==NULL)
	       inThisLM = FALSE;
	 if (!inThisLM)
         x = GetNGramProb(li->lm, nGram, nSize);

	 if ((x = exp(x)) > psum)
	    psum = x;
	 psum += li->weight*exp(x);
	 inAnyLM = TRUE;
      if (inAnyLM)
	 prob = log(psum);
      else if (nSize > 1)
	 prob = GetProb(wlab+1,nSize-1);
      else {
	 prob = LZERO;
	 HError(-16690,"GetProb: assigning zero probability");
   return prob;
	ostream& Print( ostream& o ) const {
		 return o 
			 << GetSeqId() << "\t" 
			 << GetPos() << "\t"
			 << GetProb(0) << "\t"
			 << GetProb(1) << "\t"
			 << GetProb(2) << "\t"
			 << GetProb(3);
예제 #4
Model* StringModel::ReadModel(ByteReader* byte_reader, size_t index) {
    StringModel* model = new StringModel(index);
    for (int i = 0; i < 255; ++i ) 
        model->char_prob_[i] = GetProb(byte_reader->Read16Bit(), 16);
    for (int i = 0; i < 63; ++i )
        model->length_prob_[i] = GetProb(byte_reader->ReadByte(), 8);
    return model;
예제 #5
/* CalcPerplexity: compute perplexity and other statistics */
static void CalcPerplexity(PStats *sent, LabId *pLab, int numPLabs, int nSize)
   int i,j;
   LabId *p;
   float prob;
   Boolean hasOOV;

   for (p=pLab, i=nSize-1; i<numPLabs; i++, p++)
      if (pLab[i]==unkId)
	 continue;	           /* cannot predict OOVs */
      if (skipOOV)
	 hasOOV = FALSE;
	 for (j=1; j<nSize; j++)
	    if (pLab[i-j]==unkId)
	 if (hasOOV) continue; /* skip to next label since context contains OOV */
      prob = GetProb(p, nSize);
      sent->nWrd++; sent->logpp += prob; sent->logpp2 += prob*prob;

      if (outStreamFN != NULL)

      if (trace&T_PROB)
	 printf("logP(%s |", pLab[i]->name);
	 for (j=1; j<nSize; j++)
	   printf(" %s%s", (j==1)?"":",", pLab[i-j]->name);
	 printf(") = %.4f\n", prob);
	 /* if (trace&T_INST_INFO) PrintInstStats(nSize); */
   if (trace&T_SENT)