/* EXPORT->GoNextState: move to next unseen state */ Boolean GoNextState(HMMScanState *hss, Boolean noSkip) { Boolean stepping = FALSE, ok = TRUE; int M; while (IsSeen(hss->si->nUse) && ok){ if (hss->i < hss->N-1) { ++hss->i; ++hss->se; stepping = TRUE; hss->si = hss->se->info; } else if (noSkip) return FALSE; else{ stepping = FALSE; ok = GoNextHMM(hss); } } if (ok) { Touch(&hss->si->nUse); if (stepping){ hss->ste = hss->si->pdf+1; hss->s=1; M = hss->ste->nMix; hss->M = (M<0)?-M:M; hss->m=1; if (hss->isCont){ hss->me = hss->ste->spdf.cpdf+1; hss->mp = hss->me->mpdf; } } return TRUE; } hss->se = NULL; return FALSE; }
/* MakeWtAccLists: Copy info from WtAcc to WALink and add WALink to wtStore, Zero WtAcc afterwards */ void MakeWtAccLists() { int ix,n,s,i,nMix; HMMScanState hss; HLink hmm; WALink *w; StateElem *se; StreamElem *ste; WtAcc *wa; NewHMMScan(&hset,&hss); ix=1; do { hmm = hss.hmm; for (i=2,se = hmm->svec+2; i<hmm->numStates;i++,se++) for (s=1,ste = se->info->pdf+1; s<=nStreams; s++,ste++){ w = &(wtStore[ix][i][s]); n = 0; while (*w != NULL){ ++n; w = &((*w)->next); } nMix = (hset.hsKind==TIEDHS) ? hset.tmRecs[s].nMix : ste->nMix; (*w) = CreateChWtAcc(&wtAccStack, nMix); wa = (WtAcc *)ste->hook; CopyVector(wa->c,(*w)->c); (*w)->occ = wa->occ; wa->occ = 0; ZeroVector(wa->c); } ix++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); }
HError(999,"Can only update linear transforms OR model parameters!"); xfInfo.useOutXForm = TRUE; /* This initialises things - temporary hack - THINK!! */ CreateAdaptXForm(hset, "tmp"); } /* initialise and pass information to the forward backward library */ InitialiseForBack(fbInfo, x, hset, uFlags, pruneInit, pruneInc, pruneLim, minFrwdP); if (parMode != 0) { ConvLogWt(hset); } /* 2-model reestimation */ if (al_hmmUsed){ if (trace&T_TOP) printf("2-model re-estimation enabled\n"); /* load alignment HMM set */ CreateHMMSet(&al_hset,&hmmStack,TRUE); xfInfo.al_hset = &al_hset; if (xfInfo.alXFormExt == NULL) xfInfo.alXFormExt = xfInfo.inXFormExt; /* load multiple MMFs */ if (strlen(al_hmmMMF) > 0 ) { char *p,*q; Boolean eos; p=q=al_hmmMMF; for(;;) { eos = (*p=='\0'); if ( ( isspace((int) *p) || *p == '\0' ) && (q!=p) ) {
/* UpdateParameters: in hmm using counts in accumulators */ void UpdateParameters(void) { HMMScanState hss; int size; StreamInfo *sti; WtAcc *wa; MuAcc *ma = NULL; VaAcc *va; TrAcc *ta; Boolean hFound = FALSE,shared; NewHMMScan(&hset,&hss); do if (hmmLink == hss.hmm){ hFound = TRUE; while (GoNextState(&hss,TRUE)) { while (GoNextStream(&hss,TRUE)) { sti = hss.sti; if (hss.M>1 && (uFlags&UPMIXES)){ wa = (WtAcc *)sti->hook; if (hset.hsKind == DISCRETEHS) UpDProbs(hss.i,hss.s,hss.M,wa,sti->spdf.dpdf); else UpWeights(hss.i,hss.s,hss.M,wa,sti); } if (hss.isCont && (uFlags&(UPMEANS|UPVARS)))/*PLAINHS or SHAREDHS*/ while (GoNextMix(&hss,TRUE)) { size = VectorSize(hss.mp->mean); if (!IsSeenV(hss.mp->mean)) { ma = (MuAcc *)GetHook(hss.mp->mean); if (ma->occ!=0.0) UpMeans(hss.i,hss.s,hss.m,size,ma,hss.mp->mean); /* NB old mean left in ma->mu */ TouchV(hss.mp->mean); } if (!IsSeenV(hss.mp->cov.var)) { if (uFlags&UPVARS) { va = (VaAcc *)GetHook(hss.mp->cov.var); shared = (GetUse(hss.mp->cov.var) > 1) ? TRUE:FALSE; if (va->occ!=0.0) UpVars(hss.i,hss.s,hss.m,size,va,ma->mu,hss.mp->mean,shared,hss.mp); } TouchV(hss.mp->cov.var); } } } } if (!IsSeenV(hmmLink->transP)) { if (uFlags&UPTRANS){ ta = (TrAcc *)GetHook(hmmLink->transP); UpTrans(ta,hmmLink->transP); } TouchV(hmmLink->transP); } } while (!hFound && GoNextHMM(&hss)); EndHMMScan(&hss); if (!hFound) HError(2129,"UpdateParameters: hmm not found"); }
/* EXPORT->NewHMMScan: create new HMM scan record */ void NewHMMScan(HMMSet *hset, HMMScanState *hss) { hss->hset = hset; hss->S = hset->swidth[0]; hss->isCont = (hset->hsKind == PLAINHS) || (hset->hsKind == SHAREDHS); hss->h = -1; hss->mac=NULL; if (!GoNextHMM(hss)) HError(7220,"NewHMMScan: cannot find any physical HMMs to scan"); }
void InitPhonePost (DecoderInst *dec) { HMMScanState hss; HLink hmm; MLink m; char buf[100]; LabId phoneId; NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; assert (!hmm->hook); m = FindMacroStruct (dec->hset, 'h', hmm); assert (strlen (m->id->name) < 100); strcpy (buf, m->id->name); TriStrip (buf); phoneId = GetLabId (buf, TRUE); phoneId->aux = (Ptr) 0; hmm->hook = (Ptr) phoneId; } while(GoNextHMM(&hss)); EndHMMScan(&hss); dec->nPhone = 0; /* count monophones -- #### make this more efficent! */ NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; phoneId = (LabId) hmm->hook; if (!phoneId->aux) { ++dec->nPhone; phoneId->aux = (Ptr) dec->nPhone; assert (dec->nPhone < 100); dec->monoPhone[dec->nPhone] = phoneId; } } while(GoNextHMM(&hss)); EndHMMScan(&hss); printf ("found %d monophones\n", dec->nPhone); dec->phonePost = (LogDouble *) New (&gcheap, (dec->nPhone+1) * sizeof (LogDouble)); dec->phoneFreq = (int *) New (&gcheap, (dec->nPhone+1) * sizeof (int)); }
/* EXPORT->MAPUpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void MAPUpdateModels(HMMSet *hset, UPDSet uFlags) { HMMScanState hss; HLink hmm; int px,nmapped=0,totM; long n; if (hset->logWt == TRUE) HError(999,"HMap: requires linear weights"); /* Intialise a few global variables */ SetVFloor( hset, vFloor, minVar); maxM = MaxMixInSet(hset); totM = TotMixInSet(hset); S = hset->swidth[0]; if (hset->hsKind == TIEDHS){ /* TIEDHS - update mu & var once per HMMSet */ HError(999,"TIEDHS kind not currently supported in MAP estimation"); } NewHMMScan(hset,&hss); px=1; do { hmm = hss.hmm; n = (long)hmm->hook; if (n<minEgs && !(trace&T_UPD)) HError(-2331,"UpdateModels: %s[%d] copied: only %d egs\n", HMMPhysName(hset,hmm),px,n); if (n>=minEgs && n>0) { if (uFlags & UPTRANS) HError(999,"No support for MAP updating transition probabilities"); if (maxM>1 && uFlags & UPMIXES) UpdateWeights(hset,px,hmm); if (hset->hsKind != TIEDHS){ if (uFlags & UPVARS) UpdateVars(hset,px,hmm); if (uFlags & UPMEANS) nmapped += UpdateMeans(hset,px,hmm); if (uFlags & (UPMEANS|UPVARS)) FixGConsts(hmm); } } px++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP) { printf("Observed components (means) %d of %d: %.2f\n",nmapped,totM,100*(float)nmapped/(float)totM); if (nFloorVar > 0) printf("Total %d floored variance elements in %d different mixes\n", nFloorVar,nFloorVarMix); fflush(stdout); } /* Reset vfloor */ ResetVFloor(hset,vFloor); }
/* UpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void UpdateModels(void) { int n; HLink hmm; HMMScanState hss; if (trace&T_INT){ printf("Starting Model Update\n"); fflush(stdout); } if (hsKind==TIEDHS){ if (uFlags & UPVARS) /* TIEDHS therefore only done once per HMMSet */ UpdateTMVars(); if (uFlags & UPMEANS) UpdateTMMeans(); if (uFlags & (UPMEANS|UPVARS)) FixAllGConsts(&hset); } NewHMMScan(&hset,&hss); do { hmm = hss.hmm; n = (int)hmm->hook; if (n<minEgs && !(trace&T_OPT)) HError(-2428,"%s copied: only %d egs\n",HMMPhysName(&hset,hmm),n); if (n>=minEgs) { if (uFlags & UPTRANS) UpdateTrans(hmm); if (maxMixes>1 && uFlags & UPMIXES) UpdateWeights(hmm); } if (trace&T_OPT) { if (n<minEgs) printf("Model %s copied: only %d examples\n", HMMPhysName(&hset,hmm),n); else printf("Model %s updated with %d examples\n", HMMPhysName(&hset,hmm),n); fflush(stdout); } } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP){ printf("Saving hmm's to dir %s\n",(newDir==NULL)?"Current":newDir); fflush(stdout); } if(SaveHMMSet(&hset,newDir,newExt,NULL,saveBinary)<SUCCESS) HError(2411,"UpdateModels: SaveHMMSet failed"); ResetHeaps(); /* Clean Up */ if (trace&T_TOP) printf("Reestimation complete - average log prob per frame = %e\n", totalPr/(double)totalT); }
HError(2319,"HERest: HMM file extension expected"); newExt = GetStrArg(); break; case 'p': parMode = GetChkedInt(0,500,s); break; case 'r': twoDataFiles = TRUE; break; case 's': stats = TRUE; if (NextArg()!=STRINGARG) HError(2319,"HERest: Stats file name expected"); statFN = GetStrArg(); break;
/* AttachWtAccLists: Replace WtAccs in HMMSet with lists of WALink */ void AttachWtAccLists() { int ix,s,i; HMMScanState hss; HLink hmm; StateElem *se; StreamElem *ste; NewHMMScan(&hset,&hss); ix=1; do { hmm = hss.hmm; for (i=2,se = hmm->svec+2; i<hmm->numStates;i++,se++) for (s=1,ste = se->info->pdf+1; s<=nStreams; s++,ste++){ ste->hook = wtStore[ix][i][s]; /* Note that this is known and tolerable memory leak */ } ix++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); }
static int TotMixInSet(HMMSet *hset) { HMMScanState hss; HLink hmm; int nmix=0; NewHMMScan(hset,&hss); do { hmm = hss.hmm; while (GoNextState(&hss,TRUE)) { while (GoNextStream(&hss,TRUE)) { if (hss.isCont) /* PLAINHS or SHAREDHS */ while (GoNextMix(&hss,TRUE)) { if (!IsSeenV(hss.mp->mean)) { nmix++; TouchV(hss.mp->mean); } } } } } while (GoNextHMM(&hss)); EndHMMScan(&hss); return(nmix); }
/* CreateDecoderInst Create a new instance of the decoding engine. All state information is stored here. #### Ideally instances should share other structures (i.e. LexNets) this is not implemented, yet. */ DecoderInst *CreateDecoderInst(HMMSet *hset, FSLM *lm, int nTok, Boolean latgen, Boolean useHModel, int outpBlocksize, Boolean doPhonePost, Boolean modAlign) { DecoderInst *dec; int i, N; char buf[MAXSTRLEN]; dec = (DecoderInst *) New (&recCHeap, sizeof (DecoderInst)); dec->lm = lm; dec->hset = hset; dec->useHModel = useHModel; /* dec->net = net; */ /* create compact State info. This can change number of shared states! */ /* #### this is ugly as we end up doing this twice, if we use adaptation! */ dec->si = ConvertHSet (&gcheap, hset, dec->useHModel); CreateHeap (&dec->heap, "Decoder Instance heap", MSTAK, 1, 1.5, 10000, 100000); CreateHeap (&dec->nodeInstanceHeap, "Decoder NodeInstance heap", MHEAP, sizeof (LexNodeInst), 1.5, 1000, 10000); dec->nTok = nTok; dec->latgen = latgen; dec->nLayers = 0; dec->instsLayer = NULL; /* alloc & init Heaps for TokenSets */ N = MaxStatesInSet (dec->hset); dec->maxNStates = N; dec->tokSetHeap = (MemHeap *) New (&dec->heap, N * sizeof (MemHeap)); /* #### make initial size of heap blocks smaller, or don't alloc unneeded ones in the first place (scan HMMSet) */ for (i = 0; i < N; ++i) { sprintf (buf, "Decoder %d TokenSet heap", i+1); CreateHeap (&dec->tokSetHeap[i], buf, MHEAP, (i+1) * sizeof (TokenSet), 9, 10, 5000); } dec->tempTS = (TokenSet **) New (&dec->heap, (N+1) * sizeof (TokenSet *)); /* alloc Heap for RelToken arrays */ CreateHeap (&dec->relTokHeap, "Decoder RelToken array heap", MHEAP, dec->nTok * sizeof (RelToken), 1, 1000, 5000); /* alloc heap for word end hyps */ CreateHeap (&dec->weHypHeap, "WordendHyp heap", MHEAP, sizeof (WordendHyp), 1.0, 80000, 800000); if (dec->latgen) { CreateHeap (&dec->altweHypHeap, "AltWordendHyp heap", MHEAP, sizeof (AltWordendHyp), 1.0, 8000, 80000); } #ifdef MODALIGN dec->modAlign = modAlign; if (dec->modAlign) { CreateHeap (&dec->modendHypHeap, "ModendHyp heap", MHEAP, sizeof (ModendHyp), 1.0, 80000, 800000); } #else if (modAlign) HError (9999, "CreateDecoderInst: model alignment not supported; recompile with MODALIGN"); #endif /* output probability cache */ dec->outPCache = CreateOutPCache (&dec->heap, dec->hset, outpBlocksize); /* cache debug code */ #if 0 printf (" %d %d \n", dec->hset->numStates, dec->nCacheFlags); for (i = 0; i < dec->nCacheEntries; ++i) printf ("i %d cacheFlags %lu\n", i, dec->cacheFlags[i]); for (i = 0; i < dec->hset->numStates; ++i) { assert (!CACHE_FLAG_GET(dec,i)); CACHE_FLAG_SET(dec, i); assert (CACHE_FLAG_GET(dec,i)); } /* printf ("i %d C_G %lu\n", i, CACHE_FLAG_GET(dec,i)); */ #endif /* tag left-to-right models */ { HMMScanState hss; NewHMMScan(dec->hset,&hss); do { /* #### should check each tidX only once! */ /* if (!IsSeenV(hss.hmm->transP)) { */ if (CheckLRTransP (hss.hmm->transP)) hss.hmm->tIdx *= -1; /* TouchV(hss.hmm->transP); */ } while(GoNextHMM(&hss)); EndHMMScan(&hss); } if (doPhonePost) InitPhonePost (dec); else dec->nPhone = 0; return dec; }