if (uFlags&UPXFORM) printf("XForms "); if (uFlags&UPMIXES && maxM>1) printf("MixWeights "); printf("\n\n "); if (parMode>=0) printf("Parallel-Mode[%d] ",parMode); printf("System is "); switch (hsKind){ case PLAINHS: printf("PLAIN\n"); break; case SHAREDHS: printf("SHARED\n"); break; case TIEDHS: printf("TIED\n"); break; case DISCRETEHS: printf("DISCRETE\n"); break; } printf("%d Logical/%d Physical Models Loaded, VecSize=%d\n",L,P,vSize); if (hset->numFiles>0) printf("%d MMF input files\n",hset->numFiles); if (mmfFn != NULL) printf("Output to MMF file: %s\n",mmfFn); fflush(stdout); } SetVFloor( hset, vFloor, minVar); totalPr = 0.0; if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; if (uFlags&UPXFORM) { if ((hsKind != PLAINHS) && (hsKind != SHAREDHS)) HError(999,"Can only estimated transforms with PLAINHS and SHAREDHS!");
/* EXPORT->ConvDiagC Convert Diagonal Covariance Kind Converts all the HMMs in hset to INVDIAGC from DIAGC or vice versa. If convData is TRUE then each variance element is replaced by its reciprocal - otherwise only the CovKind in each HMM is changed and no data conversions are performed. */ void ConvDiagC(HMMSet *hset, Boolean convData) { HMMScanState hss; SVector v; int k; if (hset->hsKind == DISCRETEHS || hset->hsKind == TIEDHS) return; NewHMMScan(hset, &hss); while (GoNextMix(&hss,FALSE)) { if (hss.mp->ckind == DIAGC || hss.mp->ckind == INVDIAGC){ hss.mp->ckind = (hss.mp->ckind == DIAGC)?INVDIAGC:DIAGC; if (convData){ v = hss.mp->cov.var; if (! IsSeenV(v)) { for (k=1; k<=hset->swidth[hss.s]; k++) { if (v[k] > MAXVAR) v[k] = MAXVAR; if (v[k] < MINVAR) v[k] = MINVAR; v[k] = 1/v[k]; } TouchV(v); } } } } EndHMMScan(&hss); ClearSeenFlags(hset,CLR_ALL); }
HError(999,"Can only update linear transforms OR model parameters!"); xfInfo.useOutXForm = TRUE; /* This initialises things - temporary hack - THINK!! */ CreateAdaptXForm(hset, "tmp"); } /* initialise and pass information to the forward backward library */ InitialiseForBack(fbInfo, x, hset, uFlags, pruneInit, pruneInc, pruneLim, minFrwdP); if (parMode != 0) { ConvLogWt(hset); } /* 2-model reestimation */ if (al_hmmUsed){ if (trace&T_TOP) printf("2-model re-estimation enabled\n"); /* load alignment HMM set */ CreateHMMSet(&al_hset,&hmmStack,TRUE); xfInfo.al_hset = &al_hset; if (xfInfo.alXFormExt == NULL) xfInfo.alXFormExt = xfInfo.inXFormExt; /* load multiple MMFs */ if (strlen(al_hmmMMF) > 0 ) { char *p,*q; Boolean eos; p=q=al_hmmMMF; for(;;) { eos = (*p=='\0'); if ( ( isspace((int) *p) || *p == '\0' ) && (q!=p) ) {
/* MakeWtAccLists: Copy info from WtAcc to WALink and add WALink to wtStore, Zero WtAcc afterwards */ void MakeWtAccLists() { int ix,n,s,i,nMix; HMMScanState hss; HLink hmm; WALink *w; StateElem *se; StreamElem *ste; WtAcc *wa; NewHMMScan(&hset,&hss); ix=1; do { hmm = hss.hmm; for (i=2,se = hmm->svec+2; i<hmm->numStates;i++,se++) for (s=1,ste = se->info->pdf+1; s<=nStreams; s++,ste++){ w = &(wtStore[ix][i][s]); n = 0; while (*w != NULL){ ++n; w = &((*w)->next); } nMix = (hset.hsKind==TIEDHS) ? hset.tmRecs[s].nMix : ste->nMix; (*w) = CreateChWtAcc(&wtAccStack, nMix); wa = (WtAcc *)ste->hook; CopyVector(wa->c,(*w)->c); (*w)->occ = wa->occ; wa->occ = 0; ZeroVector(wa->c); } ix++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); }
/* UpdateParameters: in hmm using counts in accumulators */ void UpdateParameters(void) { HMMScanState hss; int size; StreamInfo *sti; WtAcc *wa; MuAcc *ma = NULL; VaAcc *va; TrAcc *ta; Boolean hFound = FALSE,shared; NewHMMScan(&hset,&hss); do if (hmmLink == hss.hmm){ hFound = TRUE; while (GoNextState(&hss,TRUE)) { while (GoNextStream(&hss,TRUE)) { sti = hss.sti; if (hss.M>1 && (uFlags&UPMIXES)){ wa = (WtAcc *)sti->hook; if (hset.hsKind == DISCRETEHS) UpDProbs(hss.i,hss.s,hss.M,wa,sti->spdf.dpdf); else UpWeights(hss.i,hss.s,hss.M,wa,sti); } if (hss.isCont && (uFlags&(UPMEANS|UPVARS)))/*PLAINHS or SHAREDHS*/ while (GoNextMix(&hss,TRUE)) { size = VectorSize(hss.mp->mean); if (!IsSeenV(hss.mp->mean)) { ma = (MuAcc *)GetHook(hss.mp->mean); if (ma->occ!=0.0) UpMeans(hss.i,hss.s,hss.m,size,ma,hss.mp->mean); /* NB old mean left in ma->mu */ TouchV(hss.mp->mean); } if (!IsSeenV(hss.mp->cov.var)) { if (uFlags&UPVARS) { va = (VaAcc *)GetHook(hss.mp->cov.var); shared = (GetUse(hss.mp->cov.var) > 1) ? TRUE:FALSE; if (va->occ!=0.0) UpVars(hss.i,hss.s,hss.m,size,va,ma->mu,hss.mp->mean,shared,hss.mp); } TouchV(hss.mp->cov.var); } } } } if (!IsSeenV(hmmLink->transP)) { if (uFlags&UPTRANS){ ta = (TrAcc *)GetHook(hmmLink->transP); UpTrans(ta,hmmLink->transP); } TouchV(hmmLink->transP); } } while (!hFound && GoNextHMM(&hss)); EndHMMScan(&hss); if (!hFound) HError(2129,"UpdateParameters: hmm not found"); }
void InitPhonePost (DecoderInst *dec) { HMMScanState hss; HLink hmm; MLink m; char buf[100]; LabId phoneId; NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; assert (!hmm->hook); m = FindMacroStruct (dec->hset, 'h', hmm); assert (strlen (m->id->name) < 100); strcpy (buf, m->id->name); TriStrip (buf); phoneId = GetLabId (buf, TRUE); phoneId->aux = (Ptr) 0; hmm->hook = (Ptr) phoneId; } while(GoNextHMM(&hss)); EndHMMScan(&hss); dec->nPhone = 0; /* count monophones -- #### make this more efficent! */ NewHMMScan (dec->hset, &hss); do { hmm = hss.hmm; phoneId = (LabId) hmm->hook; if (!phoneId->aux) { ++dec->nPhone; phoneId->aux = (Ptr) dec->nPhone; assert (dec->nPhone < 100); dec->monoPhone[dec->nPhone] = phoneId; } } while(GoNextHMM(&hss)); EndHMMScan(&hss); printf ("found %d monophones\n", dec->nPhone); dec->phonePost = (LogDouble *) New (&gcheap, (dec->nPhone+1) * sizeof (LogDouble)); dec->phoneFreq = (int *) New (&gcheap, (dec->nPhone+1) * sizeof (int)); }
/* EXPORT->MAPUpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void MAPUpdateModels(HMMSet *hset, UPDSet uFlags) { HMMScanState hss; HLink hmm; int px,nmapped=0,totM; long n; if (hset->logWt == TRUE) HError(999,"HMap: requires linear weights"); /* Intialise a few global variables */ SetVFloor( hset, vFloor, minVar); maxM = MaxMixInSet(hset); totM = TotMixInSet(hset); S = hset->swidth[0]; if (hset->hsKind == TIEDHS){ /* TIEDHS - update mu & var once per HMMSet */ HError(999,"TIEDHS kind not currently supported in MAP estimation"); } NewHMMScan(hset,&hss); px=1; do { hmm = hss.hmm; n = (long)hmm->hook; if (n<minEgs && !(trace&T_UPD)) HError(-2331,"UpdateModels: %s[%d] copied: only %d egs\n", HMMPhysName(hset,hmm),px,n); if (n>=minEgs && n>0) { if (uFlags & UPTRANS) HError(999,"No support for MAP updating transition probabilities"); if (maxM>1 && uFlags & UPMIXES) UpdateWeights(hset,px,hmm); if (hset->hsKind != TIEDHS){ if (uFlags & UPVARS) UpdateVars(hset,px,hmm); if (uFlags & UPMEANS) nmapped += UpdateMeans(hset,px,hmm); if (uFlags & (UPMEANS|UPVARS)) FixGConsts(hmm); } } px++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP) { printf("Observed components (means) %d of %d: %.2f\n",nmapped,totM,100*(float)nmapped/(float)totM); if (nFloorVar > 0) printf("Total %d floored variance elements in %d different mixes\n", nFloorVar,nFloorVarMix); fflush(stdout); } /* Reset vfloor */ ResetVFloor(hset,vFloor); }
/* EXPORT->ConvExpWt Converts all mixture log-weights into weights. */ void ConvExpWt(HMMSet *hset) { HMMScanState hss; if (hset->hsKind == DISCRETEHS || hset->hsKind == TIEDHS) return; NewHMMScan(hset, &hss); while (GoNextMix(&hss,FALSE)) hss.me->weight = exp(hss.me->weight); EndHMMScan(&hss); }
/* UpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void UpdateModels(void) { int n; HLink hmm; HMMScanState hss; if (trace&T_INT){ printf("Starting Model Update\n"); fflush(stdout); } if (hsKind==TIEDHS){ if (uFlags & UPVARS) /* TIEDHS therefore only done once per HMMSet */ UpdateTMVars(); if (uFlags & UPMEANS) UpdateTMMeans(); if (uFlags & (UPMEANS|UPVARS)) FixAllGConsts(&hset); } NewHMMScan(&hset,&hss); do { hmm = hss.hmm; n = (int)hmm->hook; if (n<minEgs && !(trace&T_OPT)) HError(-2428,"%s copied: only %d egs\n",HMMPhysName(&hset,hmm),n); if (n>=minEgs) { if (uFlags & UPTRANS) UpdateTrans(hmm); if (maxMixes>1 && uFlags & UPMIXES) UpdateWeights(hmm); } if (trace&T_OPT) { if (n<minEgs) printf("Model %s copied: only %d examples\n", HMMPhysName(&hset,hmm),n); else printf("Model %s updated with %d examples\n", HMMPhysName(&hset,hmm),n); fflush(stdout); } } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP){ printf("Saving hmm's to dir %s\n",(newDir==NULL)?"Current":newDir); fflush(stdout); } if(SaveHMMSet(&hset,newDir,newExt,NULL,saveBinary)<SUCCESS) HError(2411,"UpdateModels: SaveHMMSet failed"); ResetHeaps(); /* Clean Up */ if (trace&T_TOP) printf("Reestimation complete - average log prob per frame = %e\n", totalPr/(double)totalT); }
HError(2319,"HERest: HMM file extension expected"); newExt = GetStrArg(); break; case 'p': parMode = GetChkedInt(0,500,s); break; case 'r': twoDataFiles = TRUE; break; case 's': stats = TRUE; if (NextArg()!=STRINGARG) HError(2319,"HERest: Stats file name expected"); statFN = GetStrArg(); break;
/* AttachWtAccLists: Replace WtAccs in HMMSet with lists of WALink */ void AttachWtAccLists() { int ix,s,i; HMMScanState hss; HLink hmm; StateElem *se; StreamElem *ste; NewHMMScan(&hset,&hss); ix=1; do { hmm = hss.hmm; for (i=2,se = hmm->svec+2; i<hmm->numStates;i++,se++) for (s=1,ste = se->info->pdf+1; s<=nStreams; s++,ste++){ ste->hook = wtStore[ix][i][s]; /* Note that this is known and tolerable memory leak */ } ix++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); }
static int TotMixInSet(HMMSet *hset) { HMMScanState hss; HLink hmm; int nmix=0; NewHMMScan(hset,&hss); do { hmm = hss.hmm; while (GoNextState(&hss,TRUE)) { while (GoNextStream(&hss,TRUE)) { if (hss.isCont) /* PLAINHS or SHAREDHS */ while (GoNextMix(&hss,TRUE)) { if (!IsSeenV(hss.mp->mean)) { nmix++; TouchV(hss.mp->mean); } } } } } while (GoNextHMM(&hss)); EndHMMScan(&hss); return(nmix); }
/* CreateDecoderInst Create a new instance of the decoding engine. All state information is stored here. #### Ideally instances should share other structures (i.e. LexNets) this is not implemented, yet. */ DecoderInst *CreateDecoderInst(HMMSet *hset, FSLM *lm, int nTok, Boolean latgen, Boolean useHModel, int outpBlocksize, Boolean doPhonePost, Boolean modAlign) { DecoderInst *dec; int i, N; char buf[MAXSTRLEN]; dec = (DecoderInst *) New (&recCHeap, sizeof (DecoderInst)); dec->lm = lm; dec->hset = hset; dec->useHModel = useHModel; /* dec->net = net; */ /* create compact State info. This can change number of shared states! */ /* #### this is ugly as we end up doing this twice, if we use adaptation! */ dec->si = ConvertHSet (&gcheap, hset, dec->useHModel); CreateHeap (&dec->heap, "Decoder Instance heap", MSTAK, 1, 1.5, 10000, 100000); CreateHeap (&dec->nodeInstanceHeap, "Decoder NodeInstance heap", MHEAP, sizeof (LexNodeInst), 1.5, 1000, 10000); dec->nTok = nTok; dec->latgen = latgen; dec->nLayers = 0; dec->instsLayer = NULL; /* alloc & init Heaps for TokenSets */ N = MaxStatesInSet (dec->hset); dec->maxNStates = N; dec->tokSetHeap = (MemHeap *) New (&dec->heap, N * sizeof (MemHeap)); /* #### make initial size of heap blocks smaller, or don't alloc unneeded ones in the first place (scan HMMSet) */ for (i = 0; i < N; ++i) { sprintf (buf, "Decoder %d TokenSet heap", i+1); CreateHeap (&dec->tokSetHeap[i], buf, MHEAP, (i+1) * sizeof (TokenSet), 9, 10, 5000); } dec->tempTS = (TokenSet **) New (&dec->heap, (N+1) * sizeof (TokenSet *)); /* alloc Heap for RelToken arrays */ CreateHeap (&dec->relTokHeap, "Decoder RelToken array heap", MHEAP, dec->nTok * sizeof (RelToken), 1, 1000, 5000); /* alloc heap for word end hyps */ CreateHeap (&dec->weHypHeap, "WordendHyp heap", MHEAP, sizeof (WordendHyp), 1.0, 80000, 800000); if (dec->latgen) { CreateHeap (&dec->altweHypHeap, "AltWordendHyp heap", MHEAP, sizeof (AltWordendHyp), 1.0, 8000, 80000); } #ifdef MODALIGN dec->modAlign = modAlign; if (dec->modAlign) { CreateHeap (&dec->modendHypHeap, "ModendHyp heap", MHEAP, sizeof (ModendHyp), 1.0, 80000, 800000); } #else if (modAlign) HError (9999, "CreateDecoderInst: model alignment not supported; recompile with MODALIGN"); #endif /* output probability cache */ dec->outPCache = CreateOutPCache (&dec->heap, dec->hset, outpBlocksize); /* cache debug code */ #if 0 printf (" %d %d \n", dec->hset->numStates, dec->nCacheFlags); for (i = 0; i < dec->nCacheEntries; ++i) printf ("i %d cacheFlags %lu\n", i, dec->cacheFlags[i]); for (i = 0; i < dec->hset->numStates; ++i) { assert (!CACHE_FLAG_GET(dec,i)); CACHE_FLAG_SET(dec, i); assert (CACHE_FLAG_GET(dec,i)); } /* printf ("i %d C_G %lu\n", i, CACHE_FLAG_GET(dec,i)); */ #endif /* tag left-to-right models */ { HMMScanState hss; NewHMMScan(dec->hset,&hss); do { /* #### should check each tidX only once! */ /* if (!IsSeenV(hss.hmm->transP)) { */ if (CheckLRTransP (hss.hmm->transP)) hss.hmm->tIdx *= -1; /* TouchV(hss.hmm->transP); */ } while(GoNextHMM(&hss)); EndHMMScan(&hss); } if (doPhonePost) InitPhonePost (dec); else dec->nPhone = 0; return dec; }