/* WriteMatBigram: write out old HVite format bigram */ static void WriteMatBigram(LModel *lm,char *fn,int flags) { const float epsilon = 0.000001; MatBiLM *matbi; FILE *file; Boolean isPipe; Vector v; double x,y; int i,j,rep; if (trace&T_TIO) printf("\nMB "),fflush(stdout); matbi = lm->data.matbi; file=FOpen(fn,LangModOFilter,&isPipe); for (i=1;i<=matbi->numWords;i++) { if (trace&T_TIO) { if ((i%25)==0) printf(". "),fflush(stdout); if ((i%800)==0) printf("\n "),fflush(stdout); } fprintf(file,"%-8s ",ReWriteString(matbi->wdlist[i]->name, NULL,ESCAPE_CHAR)); v=matbi->bigMat[i];rep=0;x=-1.0; for (j=1;j<=matbi->numWords;j++){ y = L2F(v[j]); if (fabs(y - x) <= epsilon) rep++; else { if (rep>0) { fprintf(file,"*%d",rep+1); rep=0; } x = y; if (x == 0.0) fprintf(file," 0"); else if (x == 1.0) fprintf(file," 1"); else fprintf(file," %e",x); } } if (rep>0) fprintf(file,"*%d",rep+1); fprintf(file,"\n"); } FClose(file,isPipe); if (trace&T_TIO) printf("\n"),fflush(stdout); }
break; case 'd': if (NextArg()!=STRINGARG) HError(2319,"HERest: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'm': minEgs = GetChkedInt(0,1000,s); break; case 'o':
/* EXPORT->WriteDict: Write the given Vocab structure to the file dictFn */ ReturnStatus WriteDict(char *dictFn, Vocab *voc) { FILE *df; Boolean isPipe,withOut,withProbs; Word wid, *wlist; Pron thisPron; float prob; char buf[MAXSTRLEN]; int i,j,nw; nw = voc->nwords; if (trace&T_TOP) printf("WriteDict: %d words/%d prons to %s\n", nw,voc->nprons,dictFn); if ( (df = FOpen(dictFn,DictOFilter,&isPipe)) == NULL){ HRError(8011,"WriteDict: Cannot create dictionary file %s",dictFn); return(FAIL); } /* Create array of words */ j = 0; wlist = (Word *)New(&gstack,sizeof(Word)*(nw+1)); for (i=0,withOut=withProbs=FALSE; i< VHASHSIZE; i++) for ( wid = voc->wtab[i]; wid != NULL; wid = wid->next ) { if (wid==voc->nullWord || wid==voc->subLatWord) continue; if (j>=nw){ FClose(df, isPipe); HRError(8090,"WriteDict: wlist full [%d]",j); return(FAIL); } wlist[j++] = wid; for (thisPron = wid->pron; thisPron != NULL; thisPron = thisPron->next) { if (thisPron->outSym==NULL || thisPron->outSym != wid->wordName) withOut=TRUE; if (thisPron->prob!=0.0) withProbs=TRUE; } } if (j!=nw){ HRError(-8090,"WriteDict: only %d of %d words found",j,nw); } /* sort list */ qsort(wlist,nw,sizeof(Word),Wd_Cmp); /* print list of prons */ for (i=0; i<nw; i++){ wid = wlist[i]; for (thisPron = wid->pron; thisPron != NULL; thisPron = thisPron->next) { ReWriteString(wid->wordName->name,buf,ESCAPE_CHAR); fprintf(df,"%s",buf); Pad(df,WORDFIELDWIDTH-strlen(buf),1); if (thisPron->outSym==NULL) { fprintf(df,"[]"); Pad(df,WORDFIELDWIDTH-2,0); } else if (thisPron->outSym != wid->wordName) { ReWriteString(thisPron->outSym->name,buf,ESCAPE_CHAR); fprintf(df,"[%s]",buf); Pad(df,WORDFIELDWIDTH-strlen(buf)-2,0); } else if (withOut) Pad(df,WORDFIELDWIDTH,0); if (withProbs) { prob=(thisPron->prob>LSMALL && thisPron->prob<=0.0)?exp(thisPron->prob):1.0; if (prob<1.0) fprintf(df," %8.6f",prob); /* 1.0 is just skipped */ else Pad(df,9,0); } for (j=0; j < thisPron->nphones; j++) { fputc(' ',df); WriteString(df,thisPron->phones[j]->name,ESCAPE_CHAR); } fprintf(df,"\n"); } } FClose(df,isPipe); return(SUCCESS); }
/* WriteNGram: Write n grams to file */ static int WriteNGrams(FILE *file,NGramLM *nglm,int n,float scale) { NEntry *ne,*be,*ce,**neTab; SEntry *se; LogFloat prob; lmId ndx[NSIZE+1]; int c,i,j,k,N,g=1,hash,neCnt,total; if (trace&T_TIO) printf("\nn%1d ",n),fflush(stdout); fprintf(file,"\n\\%d-grams:\n",n); N=VectorSize(nglm->unigrams); neTab=(NEntry **) New(&gstack,sizeof(NEntry*)*nglm->counts[0]); for (hash=neCnt=0;hash<nglm->hashsize;hash++) for (ne=nglm->hashtab[hash]; ne!=NULL; ne=ne->link) { for (i=1,ce=ne;i<n;i++) if (ne->word[i-1]==0) { ce=NULL; break; } if (ce!=NULL) for (i=n;i<NSIZE;i++) if (ne->word[i-1]!=0) { ce=NULL; break; } if (ce!=NULL && ce->nse>0) neTab[neCnt++]=ce; } qsort(neTab,neCnt,sizeof(NEntry*),nep_cmp); total=0; for (c=n;c<=NSIZE;c++) ndx[c]=0; for (j=0;j<neCnt;j++) { ne=neTab[j]; for (c=1;c<n;c++) ndx[c]=ne->word[c-1]; if (ne!=NULL && ne->nse>0) { for (i=0,se=ne->se;i<ne->nse;i++,se++) { if (trace&T_TIO) { if ((g%25000)==0) printf(". "),fflush(stdout); if ((g%800000)==0) printf("\n "),fflush(stdout); g++; } ndx[0]=se->word; if (n<nglm->nsize) be=GetNEntry(nglm,ndx,FALSE); else be=NULL; if (be==NULL || be->nse==0) be=NULL; total++; if (n==1) prob=nglm->unigrams[se->word]; else prob=se->prob; if (prob*scale<-99.999) fprintf(file,"%+6.3f",-99.999); else fprintf(file,"%+6.4f",prob*scale); c='\t'; for (k=n-1;k>=0;k--) if (rawMITFormat) fprintf(file,"%c%s",c,nglm->wdlist[ndx[k]]->name),c=' '; else fprintf(file,"%c%s",c, ReWriteString(nglm->wdlist[ndx[k]]->name, NULL,ESCAPE_CHAR)),c=' '; if (be!=NULL) fprintf(file,"\t%+6.4f\n",be->bowt*scale); else fprintf(file,"\n"); } } } Dispose(&gstack,neTab); if (trace&T_TIO) printf("\n"),fflush(stdout); return(total); }