/* ProcessText: read text files line by line and count ngrams */ void ProcessText(char *fn, bool lastFile) { FILE *f; LabId id; bool isPipe; char word[256]; if (trace&T_TOP) printf("Reading source text file %s\n",(fn==NULL) ? "<stdin>" : fn); if ((fn!=NULL) && (strcmp(fn,"-")!=0)) { if ((f = FOpen(fn,LMTextFilter,&isPipe))==NULL) HError(16410,"ProcessText: unable to open text file %s", fn); } else { f = stdin; } while (fscanf(f,"%255s",word)==1) { if (pruneWords) { if ((id = GetLabId(word,FALSE))==NULL && (id = unkId)==NULL) { stdBuf.used=0; continue; } } else { id = GetLabId(word,TRUE); } if (trace&T_INP) printf("[%s]\n",id->name); PutShiftRegister(id,&stdBuf); } if (fn!=NULL) { FClose(f,isPipe); if (lastFile) CompressBuffer(stdBuf.ngb,TRUE); } else { CompressBuffer(stdBuf.ngb,TRUE); } }
/* EXPORT->ReadDictWord: Read word and pron from src */ ReturnStatus ReadDictWord(Source *src,LabId *labels,float *prob, int *num) { char buf[MAXSTRLEN]; int len,nphones; char *ptr; float p=-1.0,v; if(!ReadString(src,buf)){ *num=-1; return(SUCCESS); } if (prob!=NULL) *prob=1.0; labels[0]=GetLabId(buf,TRUE); labels[1]=NULL;nphones=0; SkipWhiteSpace(src); while (!src->wasNewline) { if (!ReadString(src,buf)){ HRError(8050,"ReadDict: Phone or outsym expected in word %s", labels[0]->name); return(FAIL); } len = strlen(buf); if (buf[0] == '[' && buf[len-1] == ']') { /* outsym */ if (labels[1]!=NULL || nphones!=0){ HRError(8050,"ReadDict: Only single outsym allowed for word %s", labels[0]->name); return(FAIL); } buf[len-1] = '\0'; labels[1] = GetLabId(buf+1,TRUE); } else { if (nphones==0 && p<0) v=strtod(buf,&ptr); else v=0.0,ptr=buf; if (ptr!=buf) { if (v<=0.0 || v>1.0 || *ptr!=0) { HRError(8050,"ReadDict: Probability malformed %s",buf); return(FAIL); } p=v; if (prob!=NULL) *prob=v; } else { if (nphones==MAXPHONES){ HRError(8050,"ReadDict: Too many phones in word %s", labels[0]->name); return(FAIL); } labels[2+nphones++] = GetLabId(buf,TRUE); } } SkipWhiteSpace(src); } labels[nphones+2] = NULL; *num=nphones; return(SUCCESS); }
ln = lat->lnodes; ln->word = wd; ln->n=0; ln->v=0; ln = lat->lnodes+1; ln->word = wd; ln->n=0; ln->v=0; ln = lat->lnodes+nNode-1; ln->word = wd; ln->n=0; ln->v=0; ln = lat->lnodes+nNode-2; ln->word = wd; ln->n=0; ln->v=0; ln = lat->lnodes+2; for (i = 0; i< VHASHSIZE; i++) for ( wd = voc->wtab[i]; wd != NULL; wd = wd->next ) if ((wd != voc->nullWord) && (wd != voc->subLatWord)) { ln->word = wd; ln++; } la =lat->larcs; la->start = lat->lnodes; la->end = lat->lnodes+1; la->lmlike = 0.0; la = lat->larcs+1; la->start = lat->lnodes+nNode-2; la->end = lat->lnodes+nNode-1; la->lmlike = 0.0; la = lat->larcs+2; la->start = lat->lnodes+nNode-2; la->end = lat->lnodes+1; la->lmlike = 0.0; la = lat->larcs+3; for (i = 0; i < voc->nwords; i++) { la->start = lat->lnodes+1;
/* AddEquiv: Add the equivalent pair (cl,eq) to eqlist */ static void AddEquiv(char * cl, char * eq) { Equiv *p; p=(Equiv*) New(&permHeap,sizeof(Equiv)); p->classId = GetLabId(cl,TRUE); p->equivId = GetLabId(eq,TRUE); p->next = eqList; eqList = p; }
/* EXPORT->InitVocab: Initialise voc data structure */ void InitVocab(Vocab *voc) { CreateHeap(&voc->wordHeap,"Word Heap",MHEAP,sizeof(DictEntry), 0.4,200,2000); CreateHeap(&voc->pronHeap,"Pron Heap",MHEAP,sizeof(WordPron), 0.4,200,2000); CreateHeap(&voc->phonesHeap,"Phones Heap",MSTAK,1,0.4,400,4000); voc->wtab = (Word*) New(&voc->phonesHeap,sizeof(Word)*VHASHSIZE); for (int i=0; i<VHASHSIZE; i++) voc->wtab[i] = NULL; voc->nullWord = GetWord(voc, GetLabId("!NULL",TRUE), TRUE); voc->subLatWord = GetWord(voc, GetLabId("!SUBLATID",TRUE), TRUE); voc->nwords = voc->nprons = 0; }
/* CreateMonoList: set nPhones and create list of monophones */ void CreateMonoList(void) { int i,j; Boolean found; LabId list[MAXMONOPHONES], id; char buf[255]; MLink q; nPhones = 0; for (i=0; i<MACHASHSIZE; i++) for (q=hset.mtab[i]; q!=NULL; q=q->next) if (q->type=='l'){ strcpy(buf,q->id->name); TriStrip(buf); id = GetLabId(buf,TRUE); found = FALSE; for (j=0; j<nPhones; j++) if (list[j] == id) { found = TRUE; break; } if (!found){ if (nPhones>=MAXMONOPHONES) HError(2422,"CreateMonoList: Too many monophones"); list[nPhones++] = id; } } monophones = (LabId *)New(&labIdStack, nPhones*sizeof(LabId)); for (i=0; i<nPhones; i++) monophones[i] = list[i]; --monophones; }
/* SaveModel: save HMMSet containing one model */ void SaveModel(char *outfn) { if (outfn != NULL) macroLink->id = GetLabId(outfn,TRUE); if(SaveHMMSet(&hset,outDir,NULL,NULL,saveBinary)<SUCCESS) HError(2111,"SaveModel: SaveHMMSet failed"); }
/* LoadTIMITLabels: load a TIMIT transcription */ static void LoadTIMITLabels(MemHeap *x, Transcription *t, Source *src) { LabList *ll; LabId labid; HTime start,end; float score; ll = CreateLabelList(x,0); AddLabelList(ll,t); InitTrScan(); GetTrSym(src,FALSE); while (trSym == TRNUM){ start = trNum*625; /* sample rate is 16KHz */ GetTrSym(src,FALSE); if (trSym != TRNUM) HError(6552,"LoadTIMITLabels: End Time expected in TIMIT Label File"); end = trNum*625; GetTrSym(src,FALSE); if (trSym != TRSTR) HError(6552,"LoadTIMITLabels: Label Name expected in TIMIT Label File"); labid = GetLabId(trStr,TRUE); score = 0.0; AddLabel(x,ll,labid,start,end,score); GetTrSym(src,FALSE); if (trSym == TREOL) GetTrSym(src,FALSE); } }
/* Initialise: set up global data storage */ void Initialise(char *datafn) { ParmBuf pbuf; int s; Boolean eSep; CreateHeap(&iStack,"inBuf", MSTAK, 1, 0.5, 100000, LONG_MAX); CreateHeap(&dStack,"seqStack", MSTAK, 1, 0.5, 100000, LONG_MAX); CreateHeap(&cStack,"clustStack",MSTAK, 1, 0.5, 100000, LONG_MAX); /* Peek at first data file to get observation format */ if((pbuf = OpenBuffer(&iStack, datafn, 0, UNDEFF, FALSE_dup, FALSE_dup))==NULL) HError(2550,"Initialise: Config parameters invalid"); GetBufferInfo(pbuf, &info); CloseBuffer(pbuf); ResetHeap(&iStack); /* set/validate stream widths */ if(swidth[0] > 0) CheckStreamWidths(info); else ZeroStreamWidths(1,swidth); /* Create an observation to hold the input parameters */ SetStreamWidths(info.tgtPK,info.tgtVecSize,swidth,&eSep); obs = MakeObservation(&gstack,swidth,info.tgtPK,FALSE,eSep); if (segLab != NULL) segId = GetLabId(segLab,TRUE); /* Create sequences to hold all data*/ for (s=1;s<=swidth[0];s++) dSeq[s] = CreateSequence(&dStack,4096); }
/* Initialise: load hmm and initialise global data structures */ void Initialise(void) { LabId hmmId; char base[MAXSTRLEN]; char path[MAXSTRLEN]; char ext[MAXSTRLEN]; int s; /* Stacks for global structures requiring memory allocation */ CreateHeap(&segmentStack,"SegStore", MSTAK, 1, 0.0, 100000, LONG_MAX); CreateHeap(&sequenceStack,"SeqStore", MSTAK, 1, 0.0, 10000, 10000); CreateHeap(&clustSetStack,"ClustSetStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&transStack,"TransStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&traceBackStack,"TraceBackStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&bufferStack,"BufferStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&msdinfoStack,"MSDInfoStore", MSTAK, 1, 0.0, 1000, 1000); /* Load HMM def */ if(MakeOneHMM( &hset, BaseOf(hmmfn,base))<SUCCESS) HError(2128,"Initialise: MakeOneHMM failed"); if(LoadHMMSet( &hset,PathOf(hmmfn,path),ExtnOf(hmmfn,ext))<SUCCESS) HError(2128,"Initialise: LoadHMMSet failed"); SetParmHMMSet(&hset); if ((hset.hsKind==DISCRETEHS)||(hset.hsKind==TIEDHS)) uFlags = (UPDSet) (uFlags & (~(UPMEANS|UPVARS))); AttachAccs(&hset, &gstack, uFlags); /* Get a pointer to the physical HMM and set related globals */ hmmId = GetLabId(base,FALSE); macroLink = FindMacroName(&hset,'h',hmmId); hmmLink = (HLink)macroLink->structure; nStates = hmmLink->numStates; nStreams = hset.swidth[0]; for(s=1; s<=nStreams; s++) maxMixInS[s] = MaxMixInS(hmmLink, s); msdInfo = CreateMSDInfo(&msdinfoStack, hmmLink); SetVFloor( &hset, vFloor, minVar); if(segLab != NULL) segId = GetLabId(segLab,TRUE); if(trace>0) PrintInitialInfo(); thisP = CreateVector(&gstack,nStates); lastP = CreateVector(&gstack,nStates); }
/* Faking sentence end arc model alignment */ void FakeSEModelAlign(Lattice *lat, LArc *la) { la->nAlign = 1; la->lAlign = New (lat->heap, sizeof(LAlign)); la->lAlign->state = -1; la->lAlign->dur = la->end->time - la->start->time; la->lAlign->label = GetLabId("sil", FALSE); }
/* LoadSCRIBELabels: load a SCRIBE (SAM) label file - searches for first occurrence of a label symbol LBA - acoustic label LBB - broad class label UTS - utterance it loads this symbol and all subsequent labels of the same type. All other SAM label types are ignored */ static void LoadSCRIBELabels(MemHeap *x, Transcription *t, Source *src) { LabList *ll; LabId labid; HTime start,end; float score; ScribeLab ltype, lx; double sp; char buf[MAXSTRLEN]; if (!GetConfFlt(cParm,numParm,"SOURCERATE",&sp)) sp = 500.0; /* actual SCRIBE rate */ ll = CreateLabelList(x,0); AddLabelList(ll,t); InitTrScan(); do { /* search for first label */ ltype = GetScribeLab(src); if (ltype == S_EOF) HError(6554,"LoadSCRIBELabels: Unexpected EOF"); } while (ltype != S_LBB && ltype != S_LBA && ltype != S_UTS); do { /* load this and all subsequent ltype labels */ GetTrSym(src,FALSE); if (trSym != TRNUM) HError(6554,"LoadSCRIBELabels: Start Index expected [%d]\n",trSym); start = trNum * sp; GetTrSym(src,FALSE); if (trSym != TRCOMMA) HError(6554,"LoadSCRIBELabels: Comma expected [%d]\n",trSym); GetTrSym(src,FALSE); if (ltype == S_LBA || ltype == S_LBB) { /* LBB and LBA have a centre field */ if (trSym != TRCOMMA) HError(6554,"LoadSCRIBELabels: Comma expected [%d]\n",trSym); GetTrSym(src,FALSE); } if (trSym != TRNUM) HError(6554,"LoadSCRIBELabels: End Index expected [%d]\n",trSym); end = trNum * sp; GetTrSym(src,FALSE); if (trSym != TRCOMMA) HError(6554,"LoadSCRIBELabels: Comma expected [%d]\n",trSym); GetTrSym(src,FALSE); if (trSym != TRSTR) HError(6554,"LoadSCRIBELabels: Label expected [%d]\n",trSym); strcpy(buf,trStr); GetTrSym(src,FALSE); while (trSym == TRSTR){ strcat(buf,"_"); strcat(buf,trStr); GetTrSym(src,FALSE); } labid = GetLabId(buf,TRUE); score = 0.0; AddLabel(x,ll,labid,start,end,score); if (trSym != TREOL) HError(6554,"LoadSCRIBELabels: End of Line expected [%d]\n",trSym); lx = GetScribeLab(src); } while (lx != S_EOF); }
printf(" -i n add id n to filter list none\n"); printf(" -f w add word w to filter list none\n"); PrintStdOpts(""); printf("\n\n"); }
/* EXPORT->SaveToMasterfile: make all subsequent LSaves go to fname */ ReturnStatus SaveToMasterfile(char *fname) { int i; LabId nid; OutMLFEntry *omlf; char buf[MAXSTRLEN]; if (fname==NULL || *fname=='\0') { outMLF=NULL; return (FAIL); } sprintf(buf,"#!MLF-%s!#",fname); if ((nid=GetLabId(buf,FALSE))!=NULL) { for (omlf=outMLFSet, i=0; i<numOutMLF; i++, omlf++) if (omlf->name==nid) break; if (i<numOutMLF) { if ((outMLF = omlf->file)==NULL){ HRError(6511,"SaveToMasterfile: MLF file %s already closed",fname); return(FAIL); } return(SUCCESS); } } if (numOutMLF==MAXMLFS-1){ HRError(6511,"SaveToMasterfile: Unable to create MLF file %s",fname); return(FAIL); } if ((outMLF=fopen(fname,"w")) == NULL){ HRError(6511,"SaveToMasterfile: Unable to create MLF file %s",fname); return(FAIL); } fprintf(outMLF,"#!MLF!#\n"); nid = GetLabId(buf,TRUE); outMLFSet[numOutMLF].file = outMLF; outMLFSet[numOutMLF].name = nid; numOutMLF++; return(SUCCESS); }
char *lpcalc_vc_id = "$Id: LPCalc.c,v 1.1.1.1 2006/10/11 09:54:43 jal58 Exp $"; #include "HShell.h" /* HMM ToolKit Modules */ #include "HMem.h" #include "HMath.h" #include "HWave.h" #include "HLabel.h" #include "LWMap.h"
/* EXPORT->LoadStatsFile: load the statistics file output by HERest */ void LoadStatsFile(char *statfile,HMMSet *hset,Boolean otrace) { Source src; char hname[256]; int i,idx,count,N,lnum = 0; float x; HMMDef *hmm; MLink ml; LabId hmmId; double occSum = 0.0; long occN = 0; StateInfo *si; Boolean bin=FALSE; if(InitSource(statfile,&src,NoFilter)<SUCCESS) HError(7210,"LoadStatsFile: Can't open file %s", statfile); while(ReadInt(&src,&idx,1,bin)) { ++lnum; if (!ReadString(&src,hname) || !ReadInt(&src,&count,1,bin)) HError(7250,"LoadStatsFile: Format error in file %s line %d", statfile,lnum); /* look up hname and find num states N */ if ((hmmId = GetLabId(hname,FALSE))==NULL) HError(7251,"LoadStatsFile: unknown name %s at line %d", hname,lnum); if ((ml = FindMacroName(hset,'l',hmmId))==NULL) HError(7251,"LoadStatsFile: unknown model %s at line %d", hname,lnum); hmm = (HMMDef *) ml->structure; N = hmm->numStates; for (i=2; i<N; i++) { if (!ReadFloat(&src,&x,1,bin)) HError(7250,"LoadStatsFile: Float format error file %s line %d\n", statfile,lnum); si = hmm->svec[i].info; si->stateCounter = count;/* load the # of times the state occurred */ memcpy(&(si->hook),&x,sizeof(float)); /* !! */ occSum += x; ++occN; } } CloseSource(&src); if (otrace || (trace & T_OCC)) { printf(" Stats loaded for %d models\n",lnum); printf(" Mean Occupation Count = %f\n",occSum/occN); fflush(stdout); } }
/* Initialise: initialise global data structures */ void Initialise(void) { int i; char path[256]; CreateHeap(&langHeap,"LModel mem",MSTAK,1,0.5,1000,20000); if (wlistFN!=NULL) { tgtVoc = &wlist; CreateWordList(wlistFN,tgtVoc,10); } if (processText) { /* init empty buffer */ CreateWordMap(NULL,&wmap,newWords); wmap.hasCnts = TRUE; wmap.name = defMapName; wmap.htkEsc = htkEscape; ++wmap.seqno; mapUpdated = FALSE; if (tgtVoc!=NULL) { /* add words from word list to the map */ pruneWords = TRUE; for (i=0; i<tgtVoc->used; i++) { AddWordToMap(&wmap,tgtVoc->id[i]); } SortWordMap(&wmap); unkId = GetLabId(unkStr,FALSE); } /* init ngram buffer */ MakeFN(rootFN,dbsDir,NULL,path); stdBuf.used = 0; stdBuf.ng[nSize] = 1; /* count = 1 */ stdBuf.ngb = CreateNGBuffer(&langHeap,nSize,ngbSize,path,&wmap); } else { CreateWordMap(omapFN,&wmap,1); } CreateInputSet(&gstack,&wmap,&inSet); binfo.wmap = &wmap; binfo.inSet = &inSet; binfo.nSize = nSize; }
void CheckLAlign (DecoderInst *dec, Lattice *lat) { int i, j; LArc *la; float dur, laDur; Pron pron; for (i = 0, la = lat->larcs; i < lat->na; ++i, ++la) { if (la->nAlign == 0 || !la->lAlign) { if (forceLatOut) { /* Faking sentence end arc model alignment */ FakeSEModelAlign(lat, la); } else { HError (9999, "CheckLAlign: empty model alignment for arc %d", i); } } for (pron = la->end->word->pron; pron; pron = pron->next) if (pron->pnum == la->end->v) break; assert (pron); laDur = (la->end->time - la->start->time); dur = 0.0; for (j = 0; j < la->nAlign; ++j) { dur += la->lAlign[j].dur; #if 0 /* sanity checking -- does not work for non-sildicts */ strcpy (buf, la->lAlign[j].label->name); TriStrip (buf); monolab = GetLabId (buf, FALSE); assert (pron->phones[j] == monolab); #endif } #if 0 assert (la->nAlign == pron->nphones); #endif if (fabs (dur - laDur) > dec->frameDur/2) printf ("CheckLAlign: MODALIGN Sanity check failed! %d laDur %.2f dur %.2f\n", i, laDur, dur); } }
/* LoadESPSLabels: read waves label file */ static void LoadESPSLabels(MemHeap *x, Transcription *t, Source *src) { LabList *ll; LabId labid; HTime start,end; float score; ll = CreateLabelList(x,0); AddLabelList(ll,t); InitTrScan(); GetTrSym(src,FALSE); while ( trStr[0] != '#' ) { GetTrSym(src,FALSE); if (trSym == TREOF) HError(6553,"LoadESPSLabels: Unexpected EOF in ESPS Waves Label file."); if ( strcmp( "nfields", trStr) == 0 ) { GetTrSym(src,FALSE); if ( trSym != TRNUM ) HError(6553,"LoadESPSLabels: Expecting field number"); if ( trNum != 1 ) HError(6553,"LoadESPSLabels: Can only read single field label files."); } } end = start = 0.0; score = 0.0; GetTrSym(src,FALSE); while ( trSym != TRNUM && trSym != TREOF ) GetTrSym(src,FALSE); while ( trSym == TRNUM ) { start = end; /* Get time stamp */ end = trNum * 1.0E7; if ( start > end ) HError(-6553,"LoadESPSLabels: time stamps out of order."); GetTrSym(src,FALSE); /* Ignore color */ GetTrSym(src,FALSE); /* Get field label for current level */ if ( trSym != TRSTR) HError(6553,"LoadESPSLabels: Expecting label string in ESPS Waves Label file."); labid = GetLabId( trStr,TRUE); AddLabel(x,ll,labid,start,end,score); GetTrSym(src,FALSE); if ( trSym != TREOL ) HError(6553,"LoadESPSLabels: End-of-line expected in ESPS Waves Label file."); GetTrSym(src,FALSE); } }
void InitPhonePost (DecoderInst *dec) { HMMScanState hss; HLink hmm; MLink m; char buf[100]; LabId phoneId; NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; assert (!hmm->hook); m = FindMacroStruct (dec->hset, 'h', hmm); assert (strlen (m->id->name) < 100); strcpy (buf, m->id->name); TriStrip (buf); phoneId = GetLabId (buf, TRUE); phoneId->aux = (Ptr) 0; hmm->hook = (Ptr) phoneId; } while(GoNextHMM(&hss)); EndHMMScan(&hss); dec->nPhone = 0; /* count monophones -- #### make this more efficent! */ NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; phoneId = (LabId) hmm->hook; if (!phoneId->aux) { ++dec->nPhone; phoneId->aux = (Ptr) dec->nPhone; assert (dec->nPhone < 100); dec->monoPhone[dec->nPhone] = phoneId; } } while(GoNextHMM(&hss)); EndHMMScan(&hss); printf ("found %d monophones\n", dec->nPhone); dec->phonePost = (LogDouble *) New (&gcheap, (dec->nPhone+1) * sizeof (LogDouble)); dec->phoneFreq = (int *) New (&gcheap, (dec->nPhone+1) * sizeof (int)); }
/* SetConfParms: set conf parms relevant to this tool */ void SetConfParms(void) { int i; static char b[100]; nParm = GetConfig("LPLEX", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; if (GetConfStr(cParm,nParm,"STARTWORD",b)) sstId = GetLabId(b, TRUE); if (GetConfStr(cParm,nParm,"ENDWORD",b)) senId = GetLabId(b, TRUE); if (GetConfStr(cParm,nParm,"UNKNOWNNAME",b)) unkId = GetLabId(b, TRUE); } if (!sstId) sstId = GetLabId(DEF_STARTWORD,TRUE); if (!senId) senId = GetLabId(DEF_ENDWORD,TRUE); if (!unkId) unkId = GetLabId(DEF_UNKNOWNNAME,TRUE); }
/* PHIdent: parse a hmm ident and do pattern match */ static void PHIdent(ILink *models, HMMSet *hset) { char pattern[MAXSTRLEN]; int h; MLink q; LabId hmmId; Boolean fullName=TRUE; /* are there wildcards in the name */ char *p; SkipSpaces(); GetAlpha(pattern); p = pattern; h=0; while ((*p != '\0') && (h<MAXSTRLEN) && (fullName)) { if ((*p=='*')||(*p=='?')||(*p=='%')) fullName=FALSE; h++; p = pattern+h; } if (fullName) { /* this is the name of the model */ hmmId = GetLabId(pattern,FALSE); q = FindMacroName(hset,'l',hmmId); if (q != NULL) { if (trace & T_ITM) printf("%s ",hmmId->name); AddItem((HLink) q->structure, q->structure, models); } } else { /* need to search for all models that match */ for (h=0; h<MACHASHSIZE; h++) for (q=hset->mtab[h]; q!=NULL; q=q->next) if (((q->type=='h') && (parsePhysicalHMM)) || ((q->type=='l') && (!parsePhysicalHMM))) { if (DoMatch(q->id->name,pattern)) { if (trace & T_ITM) printf("%s ",q->id->name); AddItem((HLink) q->structure, q->structure, models); } } } }
/* LoadASet: set aSize, load the allophone set of x and return the number of states in each model (all allophones must have the same number of states) */ int LoadASet(LabId x) { int i,N=0; HLink hmm; MLink q; LabId id; char *aid,buf[255]; aSize = 0; for (i=0; i<MACHASHSIZE; i++) for (q=hset.mtab[i]; q!=NULL; q=q->next) if (q->type=='l'){ aid = q->id->name; strcpy(buf,aid); TriStrip(buf); id = GetLabId(buf,FALSE); if (id==x){ if (trace&T_OPT) printf(" loading allophone %s\n",aid); hmm = (HLink)q->structure; if (hmm->numStates>0) { if (N==0) N = hmm->numStates; else if (N != hmm->numStates) HError(2423,"LoadASet: allophones must have same num states %d vs %d", N, hmm->numStates); hmm->numStates = -N; aSet[++aSize] = hmm; } } } for (i=1; i<=aSize; i++) aSet[i]->numStates = N; return N; }
/* MakeDictionary: merge one or more dictionaries into a single one */ void MakeDictionary(char *fn,dictList *dicts,Vocab *wlist) { Word word,fnd,cur; Pron pron,chk; Vocab tDict,dict; dictList *d; LabId blank=GetLabId("",TRUE); int i,l,n,m,p; /* Read dictionary collection */ InitVocab(&dict); for (d=dicts;d!=NULL;d=d->next) { InitVocab(&tDict); if(ReadDict(d->fname,&tDict)<SUCCESS) HError(16913,"Could not read dict in %s", d->fname); if (trace&T_TOP) { printf("Loaded %d words from %s\n",tDict.nwords,d->fname); fflush(stdout); } #ifdef HTK_CRYPT if (tDict.encrypt) dict.encrypt=TRUE; #endif DumpPhoneTable(&tDict,&dict); for (i=0,n=0,m=0,p=0; i<VHASHSIZE; i++) for (word=tDict.wtab[i]; word!=NULL; word=word->next) if (word!=tDict.nullWord && word!=tDict.subLatWord) { if (wlist==NULL) fnd=word; else fnd=GetWord(wlist,word->wordName,FALSE); cur=GetWord(&dict,word->wordName,FALSE); if (fnd!=NULL && !(firstOnly && cur!=NULL)) { n++; cur=GetWord(&dict,word->wordName,TRUE); if (word->pron==NULL) m++; for (pron=word->pron;pron!=NULL;pron=pron->next) { if (remDup) { for (chk=cur->pron;chk!=NULL;chk=chk->next) { if (chk->nphones!=pron->nphones || chk->prob!=pron->prob) continue; for(l=0;l<chk->nphones;l++) if (chk->phones[l]!=pron->phones[l]) break; if (l==chk->nphones) break; } if (chk!=NULL) continue; } p++; NewPron(&dict,cur,pron->nphones,pron->phones, pron->outSym==NULL?blank:pron->outSym, pron->prob>log(MINPRONPROB)?exp(pron->prob):0.0); } } } if (trace&T_TOP) { printf("Copied %d words (%d null,%d prons) from %s\n",n,m,p,d->fname); fflush(stdout); } ClearVocab(&tDict); } if (wlist!=NULL) { /* Check dictionary covers word list */ for (i=0,n=0; i<VHASHSIZE; i++) for (word=wlist->wtab[i]; word!=NULL; word=word->next) { fnd=GetWord(&dict,word->wordName,FALSE); if (fnd==NULL) HError((n++>10)?16930:-16930, "HLMCopy: Cannot find definition for word %s", word->wordName->name); } if (n>0) HError(9999,"HLMCopy: Dictionary missing required words"); } /* Write dictionary */ if(WriteDict(fn,&dict)<SUCCESS) HError(3214,"HLMCopy: WriteDict failed"); if (trace&T_TOP) { printf("Wrote dictionary to %s\n",outDictFn); fflush(stdout); } }
/* LoadHTKList: load a single HTK label list - dont create anything if transAlt>0 and alt != transAlt */ static LabList * LoadHTKList(MemHeap *x, Source *src, int alt) { LabList *ll = NULL; LabId labid, auxLab[100]; LLink p = NULL; HTime start,end; float score, auxScore[100]; int n,maxAux = 0; Boolean ok; ok = ((transAlt==0) || (transAlt == alt)) ? TRUE : FALSE; if (ok) ll = CreateLabelList(x,maxAux); /* assume no aux labels */ if (trace&T_HTKL) printf("HLabel: looking for lab list\n"); while (trSym==TRNUM || trSym==TRSTR){ start = -1; end = -1; score = 0.0; if (trSym==TRNUM) { start = trNum; GetTrSym(src,TRUE); start *= htkLabelTimeScale; if (trSym==TRNUM) { end = trNum; GetTrSym(src,TRUE); end *= htkLabelTimeScale; } } if (trSym != TRSTR) HError(6550,"LoadHTKList: Label Name Expected"); labid = GetLabId(trStr,TRUE); GetTrSym(src,TRUE); if (trSym==TRNUM){ score = trNum; GetTrSym(src,TRUE); } if (trace&T_HTKL) printf("HLabel: adding %.0f %.0f %s %f\n",start,end,labid->name,score); if (ok) p = AddLabel(x,ll,labid,start,end,score); /* Any aux labels ? */ n = 0; while (trSym != TREOL && trSym!=TREOF) { n++; if (trSym != TRSTR) HError(6550,"LoadHTKList: Aux Label Name Expected"); auxLab[n] = GetLabId(trStr,TRUE); if (trace&T_HTKL) printf("HLabel: adding aux lab %d = %s\n",n,auxLab[n]->name); GetTrSym(src,TRUE); if (trSym==TRNUM){ auxScore[n] = trNum; if (trace&T_HTKL) printf("HLabel: adding aux score %d = %f\n",n,trNum); GetTrSym(src,TRUE); } else auxScore[n] = 0.0; } if (ok && n>0) { /* need to add aux info */ if (n>maxAux) { ExtendAux(x,ll,n); maxAux = n; } else while (n<maxAux) { ++n; auxLab[n] = NULL; auxScore[n] = 0.0; } AddAuxLab(p,n,auxLab,auxScore); } if (trSym!=TREOF) GetTrSym(src,TRUE); } return ll; }
/* Initialise: perform global initialisations */ static void Initialise(void) { int i,j,ndx; float x; LMInfo *li; Boolean inLM; LabId *wid,lab; NameId *na,nid; Boolean isPipe; nulClass = GetLabId(nulName,TRUE); /* normalise weights */ for (x=0.0, i=1; i<nLModel; i++) x += lmInfo[i].weight; lmInfo[0].weight = 1.0-x; /* load all models */ for (li=lmInfo, i=0; i<nLModel; i++, li++) { if (trace&T_TOP) printf("Loading language model from %s\n",li->fn); li->lm = LoadLangModel(li->fn,NULL,1.0,LMP_LOG|LMP_COUNT,&permHeap); if (li->lm->probType==LMP_COUNT) RebuildLM(li->lm,cutOff,wdThresh,LMP_LOG); AttachAccessInfo(li->lm); } if (trace&T_TOP) { printf("Using language model(s): \n"); for (li=lmInfo,i=0; i<nLModel; i++,li++) printf(" %d-gram %s, weight %.2f\n",li->lm->nSize,li->fn,li->weight); } if (numTests==0) { numTests=1; testInfo[0] = lmInfo[0].lm->nSize; } /* load or create word list */ if (wlistFN!=NULL) { /* load word list from file */ CreateWordList(wlistFN,&wList,nWords+10); nWords = wList.used; for (wid=wList.id, i=0; i<nWords; i++,wid++) /* assign lookup indices */ (*wid)->aux = (Ptr) (i+1); } else { /* derive word list from LMs */ for (nWords=0,li=lmInfo, i=0; i<nLModel; i++, li++) { /* Obtain class-LM word list in a different way */ if (li->lm->classLM) { na = li->lm->classBM; for (j=0; j<li->lm->classW; j++) { lab = GetLabId(na[j+1]->name, TRUE); if (lab->aux==NULL) lab->aux = (Ptr) (++nWords); } } else { na = li->lm->binMap; for (j=0; j<li->lm->vocSize; j++) { lab = GetLabId(na[j+1]->name,TRUE); if (lab->aux==NULL) lab->aux = (Ptr) (++nWords); } } } CreateWordList(NULL,&wList,nWords+10); for (li=lmInfo, i=0; i<nLModel; i++, li++) { /* Obtain class-LM word list in a different way */ if (li->lm->classLM) { na = li->lm->classBM; for (j=0; j<li->lm->classW; j++) { lab = GetLabId(na[j+1]->name,TRUE); ndx = ((int) lab->aux) - 1; wList.id[ndx] = lab; } } else { na = li->lm->binMap; for (j=0; j<li->lm->vocSize; j++) { lab = GetLabId(na[j+1]->name,TRUE); ndx = ((int) lab->aux) - 1; wList.id[ndx] = lab; } } } wList.used = nWords; } if (trace&T_TOP) { printf("Found %d unique words in %d model(s)\n",nWords,nLModel); fflush(stdout); } if (unkId->aux==NULL && !skipOOV) { HError(16620,"LPlex: OOV class symbol %s not in word list",unkId->name); } if (sstId->aux==NULL) { HError(16620,"LPlex: sentence start symbol %s not in word list",sstId->name); } if (senId->aux==NULL) { HError(16620,"LPlex: sentence end symbol %s not in word list",senId->name); } /* create lookup table */ l2nId = (NameId **) New(&permHeap,nLModel*sizeof(NameId *)); /* create LabId -> NameId lookup arrays (one per LM) */ for (li=lmInfo, i=0; i<nLModel; i++, li++, na++) { na = (NameId *) New(&permHeap,(nWords+2)*sizeof(NameId)); for (wid = wList.id, j=0; j<nWords; j++, wid++) { if (li->lm->classLM) { nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->classH, (*wid)->name, FALSE); } else { nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->htab, (*wid)->name, FALSE); } #ifdef SANITY if (nid==NULL) HError(-16625,"Unable to find word %s in model %s\n",(*wid)->name,li->fn); #endif } l2nId[i] = na; } /* ensure words present at least in one model */ for (wid = wList.id, j=0; j<nWords; j++, wid++) { for (inLM=FALSE,i=0; i<nLModel; i++, li++) if (l2nId[i][(int) ((*wid)->aux)]!=NULL) inLM = TRUE; if (!inLM) HError(16625,"Unable to find word %s in any model\n",(*wid)->name); } /* create equivalence class lookup array */ eqId = (LabId *) New(&permHeap,(nWords+NumEquiv()+2)*sizeof(NameId)); for (wid = wList.id, i=0; i<nWords; i++, wid++) { eqId[(int) ((*wid)->aux)] = NULL; } /* link equivalence classes */ LinkEquiv(); /* open output stream */ if (outStreamFN != NULL) if ((outStream = FOpen(outStreamFN,NoOFilter,&isPipe)) == NULL) HError(16610,"Initialise: unable to open output file %s",outStreamFN); }
int main(int argc, char *argv[]) { char * labFn, *listfn, *s; int i,fidx; MLFEntry *me = NULL; Transcription *t; void InitStats(char *listfn); void GatherStats(Transcription *t); void OutputStats(void); if(InitShell(argc,argv,hlstats_version,hlstats_vc_id)<SUCCESS) HError(1300,"HLStats: InitShell failed"); InitMem(); InitMath(); InitWave(); InitLabel(); InitLM(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */ exitId=GetLabId("!EXIT",TRUE); /* to start enterId and end exitId */ nullId=GetLabId("!NULL",TRUE); /* Name for words not in list */ while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(1319,"HLStats: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': doBigram = TRUE; if (NextArg() != STRINGARG) HError(1319,"HLStats: Ngram output file name expected"); bigFile = GetStrArg(); break; case 'c': doLCount = TRUE; lCountLimit = GetChkedInt(0,100000,s); break; case 'd': doDurs = TRUE; break; case 'f': bigFloor = GetChkedFlt(0.0,1000.0,s); break; case 'h': hSize = GetChkedInt(1,2,s); break; case 'l': doList = TRUE; if (NextArg() != STRINGARG) HError(1319,"HLStats: Output label list file name expected"); listFile = GetStrArg(); break; case 'o': doBOff = TRUE; break; case 'p': doPCount = TRUE; pCountLimit = GetChkedInt(0,100000,s); break; case 's': if (NextArg() != STRINGARG) HError(1319,"HLStats: ENTER label name expected"); enterId=GetLabId(GetStrArg(),TRUE); if (NextArg() != STRINGARG) HError(1319,"HLStats: EXIT label name expected"); exitId=GetLabId(GetStrArg(),TRUE); break; case 't': bigThresh = GetChkedInt(0,100,s); break; case 'u': uniFloor = GetChkedFlt(0.0,1000.0,s); break; case 'G': if (NextArg() != STRINGARG) HError(1319,"HLStats: Input label File format expected"); if((ff = Str2Format(GetStrArg())) == ALIEN) HError(-1389,"HLStats: Warning ALIEN Label file format set"); break; case 'I': if (NextArg() != STRINGARG) HError(1319,"HLStats: Input MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'T': if (NextArg() != INTARG) HError(1319,"HLStats: Trace value expected"); trace = GetChkedInt(0,017,s); break; default: HError(1319,"HLStats: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(1319,"HLStats: Label list file name expected"); listfn = GetStrArg(); if (!(doDurs || doBigram || doList || doLCount || doPCount)) HError(1330,"HLStats: Nothing to do!"); InitStats(listfn); i=0; while (NumArgs()>0) { if (NextArg()!=STRINGARG) HError(1319,"HLStats: Input label file name expected"); labFn = GetStrArg(); if (IsMLFFile(labFn)) { fidx = NumMLFFiles(); if ((me=GetMLFTable()) != NULL) { while(me->next != NULL) me=me->next; LoadMasterFile(labFn); me=me->next; } else { LoadMasterFile(labFn); me=GetMLFTable(); } while (me != NULL) { if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) { if (trace&T_FIL) { printf(" Processing file %s\n",me->pattern); fflush(stdout); } t = LOpen(&tmpHeap,me->pattern,ff); if (t->numLists<1) HError(-1330,"HLStats: Empty file %s",me->pattern); else GatherStats(t),i++; Dispose(&tmpHeap,t); } me = me->next; if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000 && i%1000==0) printf(". "),fflush(stdout); } if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000) printf("\n"); } else { if (trace&T_FIL) { printf(" Processing file %s\n",labFn); fflush(stdout); } t = LOpen(&tmpHeap,labFn,ff); if (t->numLists<1) HError(-1330,"HLStats: Empty file %s",me->pattern); else GatherStats(t),i++; Dispose(&tmpHeap,t); } } if (trace&T_MEM) PrintAllHeapStats(); OutputStats(); if (trace&T_MEM) PrintAllHeapStats(); Exit(0); return (0); /* never reached -- make compiler happy */ }
/* PPlexStream: compute perplexity and related statistics */ static void ProcessTextStream(char *fn, int nSize) { int i; FILE *f; LabId lab=0; double ppl; int numPLabs; Boolean isPipe; char word[256]; if (fn!=NULL) { if ((f=FOpen(fn, LMTextFilter, &isPipe))==NULL) HError(16610,"ProcessTextStream: unable to open file %s", fn); } else { f = stdin; } if (trace>0) { printf("Processing text stream: %s\n", (fn==NULL)?"<stdin>":fn); fflush(stdout); } numPLabs = 0; ZeroStats(&sent); sent.nUtt = 1; sent.nTok = 0; while ((fscanf(f, "%200s", word))==1) { if (strlen(word)>=200) HError(-16640, "ProcessTextStream: word too long, will be split: %s\n", word); lab = GetEQLab(GetLabId(word, TRUE)); if (IS_SST(lab)) { numPLabs = 0; for (i=0; i<(nSize-1); i++) pLab[numPLabs++] = sstId; ZeroStats(&sent); sent.nUtt = 1; sent.nTok = 1; continue; } if (IS_UNK(lab)) { if (trace&T_OOV) printf("mapping OOV: %s\n", lab->name); StoreOOV(&sent,lab,1); lab = unkId; } pLab[numPLabs++] = lab; sent.nTok++; if (numPLabs>=LBUF_SIZE) { HError(16645,"ProcessTextStream: word buffer size exceeded - too many words without a sentence end (%d)",LBUF_SIZE); CalcPerplexity(&sent,pLab,numPLabs,nSize); numPLabs = 0; } if (IS_SEN(lab)) { CalcPerplexity(&sent,pLab,numPLabs,nSize); AddStats(&sent, &totl); if (trace&T_SEL) { /* compact info for sentence selection */ ppl = exp(-(sent.logpp)/(double) (sent.nWrd)); printf("#! %.4f", ppl); for (i=nSize-1; i<numPLabs; i++) printf(" %s", pLab[i]->name); printf("\n"); fflush(stdout); } ZeroStats(&sent); } } AddStats(&sent,&totl); if (fn!=NULL) FClose(f,isPipe); }
int main_HCopy(int argc, char *argv[]) { char *s; /* next file to process */ void OpenSpeechFile(char *s); void AppendSpeechFile(char *s); void PutTargetFile(char *s); if(InitShell(argc,argv,hcopy_version,hcopy_vc_id)<SUCCESS) HError(1000,"HCopy: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(1000,"HCopy: InitParm failed"); if (!InfoPrinted() && NumArgs() == 0) ReportUsageHCopy(); if (NumArgs() == 0) return(0); SetConfParmsHCopy(); /* initial trace string is null */ trList.str = NULL; CreateHeap(&iStack, "InBuf", MSTAK, 1, 0.0, STACKSIZE, LONG_MAX); CreateHeap(&oStack, "OutBuf", MSTAK, 1, 0.0, STACKSIZE, LONG_MAX); CreateHeap(&cStack, "ChopBuf", MSTAK, 1, 0.0, STACKSIZE, LONG_MAX); CreateHeap(&lStack, "LabBuf", MSTAK, 1, 0.0, 10000, LONG_MAX); CreateHeap(&tStack, "Trace", MSTAK, 1, 0.0, 100, 200); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(1019,"HCopy: Bad switch %s; must be single letter",s); switch(s[0]){ case 'a': if (NextArg() != INTARG) HError(1019,"HCopy: Auxiliary label index expected"); auxLab = GetChkedInt(1,100000,s) - 1; break; case 'e': /* end time in seconds, max 10e5 secs */ en = GetChkedFlt(-MAXTIME,MAXTIME,s); stenSet = TRUE; chopF = TRUE; break; case 'i': if (NextArg() != STRINGARG) HError(1019,"HCopy: Output MLF name expected"); if(SaveToMasterfile(GetStrArg())<SUCCESS) HError(1014,"HCopy: Cannot write to MLF"); useMLF = TRUE; labF = TRUE; break; case 'l': if (NextArg() != STRINGARG) HError(1019,"HCopy: Target label file directory expected"); outLabDir = GetStrArg(); labF = TRUE; break; case 'm': xMargin = GetChkedFlt(-MAXTIME,MAXTIME,s); chopF = TRUE; break; case 'n': if (NextArg() != INTARG) HError(1019,"HCopy: Label index expected"); labstidx= GetChkedInt(-100000,100000,s); if (NextArg() == INTARG) labenidx = GetChkedInt(-100000,100000,s); chopF = TRUE; break; case 's': /* start time in seconds */ st = GetChkedFlt(0,MAXTIME,s); stenSet = TRUE; chopF = TRUE; break; case 't': if (NextArg() != INTARG) HError(1019,"HCopy: Trace line width expected"); traceWidth= GetChkedInt(10,100000,s); break; case 'x': if (NextArg() != STRINGARG) HError(1019,"HCopy: Label name expected"); labName = GetLabId(GetStrArg(),TRUE); if (NextArg() == INTARG) labRep = GetChkedInt(1,100000,s); chopF = TRUE; labF = TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(1019,"HCopy: Source file format expected"); if((srcFF = Str2Format(GetStrArg())) == ALIEN) HError(-1089,"HCopy: Warning ALIEN src file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(1019,"HCopy: Source label File format expected"); if((srcLabFF = Str2Format(GetStrArg())) == ALIEN) HError(-1089,"HCopy: Warning ALIEN Label output file format set"); labF= TRUE; break; case 'I': if (NextArg() != STRINGARG) HError(1019,"HCopy: MLF file name expected"); LoadMasterFile(GetStrArg()); labF = TRUE; break; case 'L': if (NextArg()!=STRINGARG) HError(1019,"HCopy: Label file directory expected"); labDir = GetStrArg(); labF = TRUE; break; case 'P': if (NextArg() != STRINGARG) HError(1019,"HCopy: Label File format expected"); if((tgtLabFF = Str2Format(GetStrArg())) == ALIEN) HError(-1089,"HCopy: Warning ALIEN Label file format set"); labF = TRUE; break; case 'O': if (NextArg() != STRINGARG) HError(1019,"HCopy: Target file format expected"); if((tgtFF = Str2Format(GetStrArg())) == ALIEN) HError(-1089,"HCopy: Warning ALIEN target file format set"); break; case 'T': trace = GetChkedInt(0,16,s); break; case 'X': if (NextArg()!=STRINGARG) HError(1019,"HCopy: Label file extension expected"); labExt = GetStrArg(); labF = TRUE; break; default: HError(1019,"HCopy: Unknown switch %s",s); } } if (NumArgs() == 1) HError(1019,"HCopy: Target file or + operator expected"); FixOptions(); while (NumArgs()>1) { /* process group S1 + S2 + ... TGT */ off = 0.0; if (NextArg()!=STRINGARG) HError(1019,"HCopy: Source file name expected"); s = GetStrArg(); OpenSpeechFile(s); /* Load initial file S1 */ if (NextArg()!=STRINGARG) HError(1019,"HCopy: Target file or + operator expected"); s = GetStrArg(); while (strcmp(s,"+") == 0) { /* Append + S2 + S3 ... */ if (NextArg()!=STRINGARG) HError(1019,"HCopy: Append file name expected"); s = GetStrArg(); AppendSpeechFile(s); if (NextArg()!=STRINGARG) HError(1019,"HCopy: Target file or + operator expected"); s = GetStrArg(); } PutTargetFile(s); if(trace & T_MEM) PrintAllHeapStats(); if(trans != NULL){ trans = NULL; ResetHeap(&lStack); } ResetHeap(&iStack); ResetHeap(&oStack); if(chopF) ResetHeap(&cStack); } if(useMLF) CloseMLFSaveFile(); if (NumArgs() != 0) HError(-1019,"HCopy: Unused args ignored"); return (0); /* never reached -- make compiler happy */ }
/* CombineModels: load models and combine with the one in memory */ BackOffLM *CombineModels(MemHeap *heap,LMInfo *lmi,int nLModel,int nSize,WordMap *wl) { int i,j,nw; float x; LMInfo *li; BackOffLM *tgtLM; WordMap wordList; LabId lab; NameId *na; /* normalise weights */ for (x=0.0, i=1; i<nLModel; i++) x += lmInfo[i].weight; lmInfo[0].weight = 1.0-x; /* load all models except the first one*/ for (li=lmInfo+1, i=1; i<nLModel; i++, li++) { if (trace&T_TOP) printf("Loading language model from %s\n",li->fn); li->lm = LoadLangModel(li->fn,wl,1.0,LMP_FLOAT,heap); } if (wl==NULL) { wl = &wordList; /* derive word list from LMs */ for (li=lmInfo, i=0; i<nLModel; i++, li++) { na = li->lm->binMap; for (j=0; j<li->lm->vocSize; j++) { lab = GetLabId(na[j+1]->name,TRUE); lab->aux=NULL; } } for (nw=0,li=lmInfo, i=0; i<nLModel; i++, li++) { na = li->lm->binMap; for (j=0; j<li->lm->vocSize; j++) { lab = GetLabId(na[j+1]->name,FALSE); if (lab->aux==NULL) { nw++; lab->aux = (Ptr) wl; } } } CreateWordList(NULL,wl,nw+10); for (nw=0,li=lmInfo, i=0; i<nLModel; i++, li++) { na = li->lm->binMap; for (j=0; j<li->lm->vocSize; j++) { lab = GetLabId(na[j+1]->name,FALSE); if (lab->aux==(Ptr) wl) { wl->id[nw++]=lab; lab->aux = NULL; } } } wl->used = nw; } if (trace&T_TOP) { printf("Using language model(s): \n"); for (li=lmInfo,i=0; i<nLModel; i++,li++) printf(" %d-gram %s, weight %.2f\n",li->lm->nSize,li->fn,li->weight); } if (trace&T_TOP) { printf("Generating %d-gram model %s\n",nSize,outFN); fflush(stdout); } tgtLM = MergeModels(heap,lmInfo,nLModel,nSize,wl); #ifdef HTK_CRYPT if (tgtLM->encrypt && binfo.saveFmt==LMF_TEXT) binfo.saveFmt = LMF_BINARY; #endif for (i=1; i<=nSize; i++) { tgtLM->gInfo[i].fmt = (i==1) ? LMF_TEXT : binfo.saveFmt; } return tgtLM; }