int main(int argc, char *argv[]) { char *s; Initialise(argc,argv); CreateHeap(&modelHeap, "Model heap", MSTAK, 1, 0.0, 100000, 800000 ); CreateHMMSet(&hset,&modelHeap,TRUE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3219,"HNetTest: Bad switch %s; must be single letter",s); switch(s[0]){ case 'H': if (NextArg() != STRINGARG) HError(3219,"HNetTest: MMF File name expected"); AddMMF(&hset,GetStrArg()); break; default: HError(3219,"HNetTest: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(999,"HNetTest: Dictionary file name expected"); dictFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(999,"HNetTest: Word Net file name expected"); wdNetFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(999,"HNetTest: HMM list file name expected"); hmmListFn = GetStrArg(); if(MakeHMMSet(&hset,hmmListFn)<SUCCESS) HError(999,"HNetTest: MakeHMMSet failed"); if(LoadHMMSet(&hset,NULL,NULL)<SUCCESS) HError(999,"HNetTest: LoadHMMSet failed"); InitVocab(&vocab); if(ReadDict(dictFn,&vocab)<SUCCESS) HError(3213, "HNetTest: ReadDict failed"); LoadNetwork(); }
/* GenSentences: top level control of the sentence generator */ void GenSentences(char * latfn, char * dicfn) { int i,min,max,len; double e,p; MemHeap lheap; FILE *f; Boolean isPipe; InitVocab(&voc); if(ReadDict(dicfn,&voc)<SUCCESS) HError(3413,"GenSententces:ReadDict failed" ); CreateHeap(&lheap,"Lattice Heap",MSTAK,1,0.4,1000,5000); if ((f=FOpen(latfn,NetFilter,&isPipe)) == NULL) HError(3410,"GenSentences: Can't open lattice file %s",latfn); if((lat = ReadLattice(f, &lheap, &voc, TRUE, FALSE))==NULL) HError(3410,"GenSentences: ReadLattice failed"); FClose(f,isPipe); if (trace&T_TOP) printf("HSGen %d sents from lattice %s/dictionary %s\n", ngen,latfn,dicfn); psSum = 0.0; lenSum = 0; min = 100000; max = 0; if (trace&T_DET) quiet = TRUE; /* kill output if detailed trace */ for (i=1; i<=ngen; i++){ len = GenSent(i); lenSum += len; if (len>max) max = len; if (len<min) min = len; } if (stats) { ComputeVSize(); e = psSum / lenSum; p = exp(e); e = e / log(2.0); printf("Entropy = %f, Perplexity = %f\n",e,p); printf("%d Sentences: average len = %.1f, min=%d, max=%d\n", ngen,(float)lenSum/ngen,min,max); } }
inline TImpl(const TBlob& blob) : Blob_(blob) { ReadDict(); }
/* MakeDictionary: merge one or more dictionaries into a single one */ void MakeDictionary(char *fn,dictList *dicts,Vocab *wlist) { Word word,fnd,cur; Pron pron,chk; Vocab tDict,dict; dictList *d; LabId blank=GetLabId("",TRUE); int i,l,n,m,p; /* Read dictionary collection */ InitVocab(&dict); for (d=dicts;d!=NULL;d=d->next) { InitVocab(&tDict); if(ReadDict(d->fname,&tDict)<SUCCESS) HError(16913,"Could not read dict in %s", d->fname); if (trace&T_TOP) { printf("Loaded %d words from %s\n",tDict.nwords,d->fname); fflush(stdout); } #ifdef HTK_CRYPT if (tDict.encrypt) dict.encrypt=TRUE; #endif DumpPhoneTable(&tDict,&dict); for (i=0,n=0,m=0,p=0; i<VHASHSIZE; i++) for (word=tDict.wtab[i]; word!=NULL; word=word->next) if (word!=tDict.nullWord && word!=tDict.subLatWord) { if (wlist==NULL) fnd=word; else fnd=GetWord(wlist,word->wordName,FALSE); cur=GetWord(&dict,word->wordName,FALSE); if (fnd!=NULL && !(firstOnly && cur!=NULL)) { n++; cur=GetWord(&dict,word->wordName,TRUE); if (word->pron==NULL) m++; for (pron=word->pron;pron!=NULL;pron=pron->next) { if (remDup) { for (chk=cur->pron;chk!=NULL;chk=chk->next) { if (chk->nphones!=pron->nphones || chk->prob!=pron->prob) continue; for(l=0;l<chk->nphones;l++) if (chk->phones[l]!=pron->phones[l]) break; if (l==chk->nphones) break; } if (chk!=NULL) continue; } p++; NewPron(&dict,cur,pron->nphones,pron->phones, pron->outSym==NULL?blank:pron->outSym, pron->prob>log(MINPRONPROB)?exp(pron->prob):0.0); } } } if (trace&T_TOP) { printf("Copied %d words (%d null,%d prons) from %s\n",n,m,p,d->fname); fflush(stdout); } ClearVocab(&tDict); } if (wlist!=NULL) { /* Check dictionary covers word list */ for (i=0,n=0; i<VHASHSIZE; i++) for (word=wlist->wtab[i]; word!=NULL; word=word->next) { fnd=GetWord(&dict,word->wordName,FALSE); if (fnd==NULL) HError((n++>10)?16930:-16930, "HLMCopy: Cannot find definition for word %s", word->wordName->name); } if (n>0) HError(9999,"HLMCopy: Dictionary missing required words"); } /* Write dictionary */ if(WriteDict(fn,&dict)<SUCCESS) HError(3214,"HLMCopy: WriteDict failed"); if (trace&T_TOP) { printf("Wrote dictionary to %s\n",outDictFn); fflush(stdout); } }
int main(int argc, char *argv[]) { int i; char *s,*c; char fmt[256]; dictList *dEntry,*d; InitShell(argc,argv,prog_version,prog_vc_id); InitMem(); InitMath(); InitWave(); InitLabel(); InitDict(); InitWMap(); InitLUtil(); InitLModel(); InitPCalc(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(EXIT_SUCCESS); SetConfParms(); CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000); for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0; while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(16919,"Bad switch %s; must be single letter",s); switch(s[0]){ case 'c': i = GetChkedInt(2,LM_NSIZE,s); cutOff[i] = GetChkedInt(1,1000,s); break; case 'd': if (NextArg()!=STRINGARG) HError(16919,"LMCopy: Input dictionary file name expected"); dEntry=New(&gcheap,sizeof(dictList)); dEntry->fname=GetStrArg(); dEntry->next=NULL; if (dList==NULL) dList=dEntry; else { for (d=dList;d->next!=NULL;d=d->next); d->next=dEntry; } break; case 'f': strcpy(fmt,GetStrArg()); for (c=fmt; *c!=0; *c=toupper(*c), c++); if (strcmp(fmt, LM_TXT_TEXT)==0) saveFmt = LMF_TEXT; else if (strcmp(fmt, LM_TXT_BINARY)==0) saveFmt = LMF_BINARY; else if (strcmp(fmt, LM_TXT_ULTRA)==0) saveFmt = LMF_ULTRA; else HError(16919,"Unrecognised LM format, should be one of [%s, %s, %s]", LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA); break; case 'm': remDup=FALSE; break; case 'n': nSize = GetChkedInt(1,LM_NSIZE,s); break; case 'o': firstOnly=TRUE; break; case 'u': if (NextArg()!=STRINGARG) HError(16919,"LMCopy: Unigram file name expected"); uniFn = GetStrArg(); break; case 'v': if (NextArg()!=STRINGARG) HError(16919,"LMCopy: Dictionary output file name expected"); outDictFn = GetStrArg(); break; case 'w': if (NextArg() != STRINGARG) HError(16919,"LPlex: Word list file name expected"); wlistFN = GetStrArg(); break; case 'T': trace = GetChkedInt(0,077, s); break; default: HError(16919,"LMPlex: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) /* load the language model */ HError(16919, "Input language model filename expected"); srcFN = GetStrArg(); if (NextArg()!=STRINGARG) /* load the language model */ HError(16919, "Output language model filename expected"); tgtFN= GetStrArg(); if (wlistFN!=NULL) { InitVocab(&vocab); if(ReadDict(wlistFN,&vocab) < SUCCESS) HError(16913,"Could not read dict in %s", wlistFN); if (trace&T_TOP) { printf("Loaded %d words from %s\n",vocab.nwords,wlistFN); fflush(stdout); } voc = &vocab; CreateWordList(wlistFN,&wlist,10); lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap); } else { voc = NULL; lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap); } if (trace&T_TOP) { printf("Loaded model from %s\n",srcFN); fflush(stdout); } if (lm->probType==LMP_COUNT) { RebuildLM(lm, cutOff, NULL, LMP_FLOAT); /* GLM there was no threshold before! */ } if (uniFn!=NULL) ReplaceUnigrams(uniFn,lm); if (nSize>0 && nSize<lm->nSize) lm->nSize = nSize; #ifdef HTK_CRYPT if (lm->encrypt && saveFmt==LMF_TEXT) saveFmt = LMF_BINARY; #endif for (i=1;i<=lm->nSize;i++) lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt; SaveLangModel(tgtFN,lm); if (trace&T_TOP) { printf("Wrote model to %s\n",tgtFN); fflush(stdout); } if (outDictFn) { MakeDictionary(outDictFn,dList,voc); } Exit(EXIT_SUCCESS); return EXIT_SUCCESS; /* never reached -- make compiler happy */ }
#include "HLM.h" typedef enum {unknown, wordLoop, boBiGram, matBiGram, multiLat, wordPair} BuildType; static int trace = 0; /* Trace flags */ static LabId enterId; /* id of !ENTRY label in ngram */ static LabId exitId; /* id of !EXIT label in ngram */ static LabId bStartId=NULL; /* id of start bracket */ static LabId bEndId=NULL; /* id of end bracket */ static LabId unknownId; /* id of unknown label in ngram */ static Boolean zapUnknown = FALSE; /* zap unknown symbols from bigram */ MemHeap buildStack; /* ---------------- Configuration Parameters --------------------- */ static ConfParam *cParm[MAXGLOBS]; static int nParm = 0; /* total num params */ /* ---------------- Process Command Line ------------------------- */ /* SetConfParms: set conf parms relevant to this tool */ void SetConfParms(void) { int i; nParm = GetConfig("HBUILD", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; } } void ReportUsage(void) { printf("\nUSAGE: HBuild [options] wordList latFile\n\n"); printf(" Option Default\n\n"); printf(" -b binary lattice output ASCII\n"); printf(" -m s load matrix bigram from s off\n"); printf(" -n s load back-off bigram from s off\n"); printf(" -s s1 s2 s1/s2 are bigram start/end labels !ENTER !EXIT\n"); printf(" -t s1 s2 bracket word-loop/pair with s1 s2 off\n"); printf(" -u s set unknown symbol to s !NULL\n"); printf(" -w s load word-pair grammar from s off\n"); printf(" -x s load multi-level lattice from s off\n"); printf(" -z ignore ngrams with unknown symbol off\n"); PrintStdOpts(""); printf("\n\n"); } int main(int argc, char *argv[]) { char *wordListFn,*latFn,*ipFn=NULL; LModel *bigramLm; BuildType bType = unknown; Boolean saveLatBin = FALSE; LatFormat format = HLAT_LMLIKE; Lattice *lat,*ipLat; Vocab voc; char *s; Lattice *ProcessWordLoop(MemHeap *latHeap, Vocab *voc); Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM); void SaveLattice(Lattice *lat, char *latFn, LatFormat format); Lattice *LoadLattice(MemHeap *latHeap, char *latFn, Vocab *voc, Boolean shortArc); Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *fn); if(InitShell(argc,argv,hbuild_version,hbuild_vc_id)<SUCCESS) HError(3000,"HBuild: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitDict(); InitNet(); InitLM(); CreateHeap(&buildStack, "HBuild Stack", MSTAK, 1, 0.0, 100000, LONG_MAX ); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms();
/* Initialise: set up global data structures */ void Initialise(void) { Boolean eSep; int s; /* Load hmms, convert to inverse DiagC */ if(MakeHMMSet(&hset,hmmListFn)<SUCCESS) HError(3228,"Initialise: MakeHMMSet failed"); if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS) HError(3228,"Initialise: LoadHMMSet failed"); ConvDiagC(&hset,TRUE); /* Create observation and storage for input buffer */ SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep); obs=MakeObservation(&gstack,hset.swidth,hset.pkind, hset.hsKind==DISCRETEHS,eSep); /* sort out masks just in case using adaptation */ if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; if (xfInfo.useOutXForm || (update>0)) { CreateHeap(®Heap, "regClassStore", MSTAK, 1, 0.5, 1000, 8000 ); /* This initialises things - temporary hack - THINK!! */ CreateAdaptXForm(&hset, "tmp"); /* initialise structures for the f-b frame-state alignment pass */ utt = (UttInfo *) New(®Heap, sizeof(UttInfo)); fbInfo = (FBInfo *) New(®Heap, sizeof(FBInfo)); /* initialise a recogniser for frame/state alignment purposes */ alignpsi=InitPSetInfo(&hset); alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE); SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam); InitUttInfo(utt, FALSE); InitialiseForBack(fbInfo, ®Heap, &hset, (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0, genBeam*4.0+1.0, 10.0); utt->twoDataFiles = FALSE; utt->S = hset.swidth[0]; AttachPreComps(&hset,hset.hmem); } CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000); CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000); maxM = MaxMixInSet(&hset); for (s=1; s<=hset.swidth[0]; s++) maxMixInS[s] = MaxMixInSetS(&hset, s); if (trace&T_TOP) { printf("Read %d physical / %d logical HMMs\n", hset.numPhyHMM,hset.numLogHMM); fflush(stdout); } /* Initialise recogniser */ if (nToks>1) nBeam=genBeam; psi=InitPSetInfo(&hset); vri=InitVRecInfo(psi,nToks,models,states); /* Read dictionary and create storage for lattice */ InitVocab(&vocab); if(ReadDict(dictFn,&vocab)<SUCCESS) HError(3213, "Main: ReadDict failed"); CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000); if (trace & T_MEM){ printf("Memory State After Initialisation\n"); PrintAllHeapStats(); } }