HError(999,"Can only update linear transforms OR model parameters!"); xfInfo.useOutXForm = TRUE; /* This initialises things - temporary hack - THINK!! */ CreateAdaptXForm(hset, "tmp"); } /* initialise and pass information to the forward backward library */ InitialiseForBack(fbInfo, x, hset, uFlags, pruneInit, pruneInc, pruneLim, minFrwdP); if (parMode != 0) { ConvLogWt(hset); } /* 2-model reestimation */ if (al_hmmUsed){ if (trace&T_TOP) printf("2-model re-estimation enabled\n"); /* load alignment HMM set */ CreateHMMSet(&al_hset,&hmmStack,TRUE); xfInfo.al_hset = &al_hset; if (xfInfo.alXFormExt == NULL) xfInfo.alXFormExt = xfInfo.inXFormExt; /* load multiple MMFs */ if (strlen(al_hmmMMF) > 0 ) { char *p,*q; Boolean eos; p=q=al_hmmMMF; for(;;) { eos = (*p=='\0'); if ( ( isspace((int) *p) || *p == '\0' ) && (q!=p) ) {
/* UpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void UpdateModels(void) { int n; HLink hmm; HMMScanState hss; if (trace&T_INT){ printf("Starting Model Update\n"); fflush(stdout); } if (hsKind==TIEDHS){ if (uFlags & UPVARS) /* TIEDHS therefore only done once per HMMSet */ UpdateTMVars(); if (uFlags & UPMEANS) UpdateTMMeans(); if (uFlags & (UPMEANS|UPVARS)) FixAllGConsts(&hset); } NewHMMScan(&hset,&hss); do { hmm = hss.hmm; n = (int)hmm->hook; if (n<minEgs && !(trace&T_OPT)) HError(-2428,"%s copied: only %d egs\n",HMMPhysName(&hset,hmm),n); if (n>=minEgs) { if (uFlags & UPTRANS) UpdateTrans(hmm); if (maxMixes>1 && uFlags & UPMIXES) UpdateWeights(hmm); } if (trace&T_OPT) { if (n<minEgs) printf("Model %s copied: only %d examples\n", HMMPhysName(&hset,hmm),n); else printf("Model %s updated with %d examples\n", HMMPhysName(&hset,hmm),n); fflush(stdout); } } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP){ printf("Saving hmm's to dir %s\n",(newDir==NULL)?"Current":newDir); fflush(stdout); } if(SaveHMMSet(&hset,newDir,newExt,NULL,saveBinary)<SUCCESS) HError(2411,"UpdateModels: SaveHMMSet failed"); ResetHeaps(); /* Clean Up */ if (trace&T_TOP) printf("Reestimation complete - average log prob per frame = %e\n", totalPr/(double)totalT); }
/* PState: parse state and add all matches in models to ilist */ static void PState(ILink models, ILink *ilist, char *type, HMMSet *hset) { IntSet states; int j; HMMDef *hmm; ILink h; states = CreateSet(maxStates); PIndex(states); SkipSpaces(); if (ch == '.') { ReadCh(); PStatecomp(models,ilist,type,states,hset); } else { ChkType('s',type); for (h=models; h!=NULL; h=h->next) { hmm = h->owner; for (j=2; j<hmm->numStates; j++) if (IsMember(states,j)) { /* tie ->info */ if (trace & T_ITM) printf(" %12s.state[%d]\n", HMMPhysName(hset,hmm),j); AddItem(hmm,hmm->svec+j,ilist); } } } FreeSet(states); }
/* EXPORT->MAPUpdateModels: update all models and save them in newDir if set, new files have newExt if set */ void MAPUpdateModels(HMMSet *hset, UPDSet uFlags) { HMMScanState hss; HLink hmm; int px,nmapped=0,totM; long n; if (hset->logWt == TRUE) HError(999,"HMap: requires linear weights"); /* Intialise a few global variables */ SetVFloor( hset, vFloor, minVar); maxM = MaxMixInSet(hset); totM = TotMixInSet(hset); S = hset->swidth[0]; if (hset->hsKind == TIEDHS){ /* TIEDHS - update mu & var once per HMMSet */ HError(999,"TIEDHS kind not currently supported in MAP estimation"); } NewHMMScan(hset,&hss); px=1; do { hmm = hss.hmm; n = (long)hmm->hook; if (n<minEgs && !(trace&T_UPD)) HError(-2331,"UpdateModels: %s[%d] copied: only %d egs\n", HMMPhysName(hset,hmm),px,n); if (n>=minEgs && n>0) { if (uFlags & UPTRANS) HError(999,"No support for MAP updating transition probabilities"); if (maxM>1 && uFlags & UPMIXES) UpdateWeights(hset,px,hmm); if (hset->hsKind != TIEDHS){ if (uFlags & UPVARS) UpdateVars(hset,px,hmm); if (uFlags & UPMEANS) nmapped += UpdateMeans(hset,px,hmm); if (uFlags & (UPMEANS|UPVARS)) FixGConsts(hmm); } } px++; } while (GoNextHMM(&hss)); EndHMMScan(&hss); if (trace&T_TOP) { printf("Observed components (means) %d of %d: %.2f\n",nmapped,totM,100*(float)nmapped/(float)totM); if (nFloorVar > 0) printf("Total %d floored variance elements in %d different mixes\n", nFloorVar,nFloorVarMix); fflush(stdout); } /* Reset vfloor */ ResetVFloor(hset,vFloor); }
break; case 'd': if (NextArg()!=STRINGARG) HError(2319,"HERest: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'm': minEgs = GetChkedInt(0,1000,s); break; case 'o':
/* Interpolate: top level of deleted interpolation */ void Interpolate(void) { LabId x; int i,N,p,s,b,j,M=0; float l; StreamElem *ste; CreateWStore(); CreateMonoList(); for (p=1; p<=nPhones; p++){ x = monophones[p]; if (trace&T_INT) printf("Smoothing phone %s [%d]\n",x->name,p); N = LoadASet(x); for (i=2; i<N; i++) { if (trace&T_INT) printf(" State %d\n",i); for (s=1; s<=nStreams; s++){ if (trace&T_INT) printf(" Stream %d\n",s); LoadSSet(i,s); switch(hsKind){ case TIEDHS: M = hset.tmRecs[s].nMix; break; case DISCRETEHS: M = sSet[1]->nMix; break; } CalcWBar(wbar[0],0,M); for (b=1; b<=nBlk; b++) CalcWBar(wbar[b],b,M); for (j=1; j<=aSize; j++){ ste = sSet[j]; CalcWCd(wcd[0],0,ste,M); for (b=1; b<=nBlk; b++) CalcWCd(wcd[b],b,ste,M); l = LambdaOpt(ste,M); SmoothWtAcc(ste,l,M); if (trace&T_INT) printf(" Model %s lambda = %f\n",HMMPhysName(&hset,aSet[j]),l); } } } } }
default: HError(2319,"HERest: Unknown switch %s",s); } } if (NextArg() != STRINGARG) HError(2319,"HERest: file name of vocabulary list expected"); Initialise(fbInfo, &fbInfoStack, &hset, GetStrArg()); InitUttInfo(utt, twoDataFiles); numUtt = 1; if (trace&T_TOP) SetTraceFB(); /* allows HFB to do top-level tracing */ do { if (NextArg()!=STRINGARG) HError(2319,"HERest: data file name expected"); if (twoDataFiles && (parMode!=0)){ if ((NumArgs() % 2) != 0)
hmmExt = GetStrArg(); break; case 'B': saveBinary=TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2319,"HERest: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2389,"HERest: Warning ALIEN Data file format set"); break; case 'G':
/* PStatecomp: parse a statecomp */ static void PStatecomp(ILink models, ILink *ilist, char *type, IntSet states, HMMSet *hset) { HMMDef *hmm; ILink h; int s,j; IntSet streams; Keyword kw; switch(kw=GetKey()) { case MIX_KEY: case STREAM_KEY: if (hset->hsKind==TIEDHS || hset->hsKind==DISCRETEHS) HError(7231,"PStatecomp: Cannot specify streams or mixes unless continuous"); streams = CreateSet(SMAX); if(kw==STREAM_KEY) { PIndex(streams); SkipSpaces(); if (ch != '.') EdError(". expected after stream spec"); ReadCh(); if (GetKey() != MIX_KEY) EdError("Mix expected after Stream index"); } else AddMember(streams,1); SkipSpaces(); if (ch=='[') PMix(models,ilist,type,states,streams,hset); else { ChkType('p',type); for (h=models; h!=NULL; h=h->next) { hmm = h->owner; for (j=2; j<hmm->numStates; j++) if (IsMember(states,j)) for (s=1; s<=hset->swidth[0];s++) if (IsMember(streams,s)) { /* tie -> spdf */ if (trace & T_ITM) printf(" %12s.state[%d].stream[%d]\n", HMMPhysName(hset,hmm),j,s); AddItem(hmm,hmm->svec[j].info->pdf+s,ilist); } } } FreeSet(streams); break; case DUR_KEY: ChkType('d',type); for (h=models; h!=NULL; h=h->next) { hmm = h->owner; for (j=2; j<hmm->numStates; j++) if (IsMember(states,j)) { /* tie ->dur */ if (trace & T_ITM) printf(" %12s.state[%d].dur\n", HMMPhysName(hset,hmm),j); AddItem(hmm,hmm->svec[j].info,ilist); } } break; case WEIGHTS_KEY: ChkType('w',type); for (h=models; h!=NULL; h=h->next) { hmm = h->owner; for (j=2; j<hmm->numStates; j++) if (IsMember(states,j)) { /* tie ->stream weights */ if (trace & T_ITM) printf(" %12s.state[%d].weights\n", HMMPhysName(hset,hmm),j); AddItem(hmm,hmm->svec[j].info,ilist); } } break; default: EdError("dur, weight, stream or mix expected"); } }
/* PMix: parse a mixture spec */ static void PMix(ILink models, ILink *ilist, char *type, IntSet states, IntSet streams,HMMSet *hset) { IntSet mixes; HMMDef *hmm; ILink h; int s,j,m; MixtureElem *me; StreamElem *ste; enum {TMIX, TMEAN, TCOV} what; mixes = CreateSet(maxMixes); PIndex(mixes); SkipSpaces(); what = TMIX; if (ch == '.') { ReadCh(); switch(GetKey()) { case MEAN_KEY: what = TMEAN; ChkType('u',type); break; case COV_KEY: what = TCOV; ChkType('a',type); break; default: EdError("Mean or Cov expected"); } } else ChkType('m',type); for (h=models; h!=NULL; h=h->next) { hmm = h->owner; for (j=2; j<hmm->numStates; j++) if (IsMember(states,j)) { ste = hmm->svec[j].info->pdf+1; for (s=1; s<=hset->swidth[0]; s++,ste++) if (IsMember(streams,s)) { me = ste->spdf.cpdf+1; for (m=1; m<=ste->nMix; m++,me++) if (me->weight>MINMIX && IsMember(mixes,m)) { switch (what) { case TMIX: /* tie ->mpdf */ if (trace & T_ITM) printf(" %12s.state[%d].stream[%d].mix[%d]\n", HMMPhysName(hset,hmm),j,s,m); AddItem(hmm,me,ilist); break; case TMEAN: /* tie ->mean */ ChkType('u',type); if (trace & T_ITM) printf(" %12s.state[%d].stream[%d].mix[%d].mean\n", HMMPhysName(hset,hmm),j,s,m); AddItem(hmm,me->mpdf,ilist); break; case TCOV: /* tie ->cov */ switch (me->mpdf->ckind) { case INVDIAGC: case DIAGC: ChkType('v',type); break; case FULLC: ChkType('i',type); break; case LLTC: ChkType('c',type); break; case XFORMC: ChkType('x',type); break; } if (trace & T_ITM) printf(" %12s.state[%d].stream[%d].mix[%d].%c\n", HMMPhysName(hset,hmm),j,s,m,*type); AddItem(hmm,me->mpdf,ilist); break; } } } } } FreeSet(mixes); }
/* UpdateWeights: use acc values to calc new estimate of mix weights */ static void UpdateWeights(HMMSet *hset, int px, HLink hmm) { int i,s,m,M=0,N,vSize; float x,occi,denom,tmp; WtAcc *wa; StateElem *se; StreamElem *ste; MixtureElem *me; N = hmm->numStates; se = hmm->svec+2; for (i=2; i<N; i++,se++){ ste = se->info->pdf+1; for (s=1;s<=S; s++,ste++){ wa = (WtAcc *)ste->info->hook; switch (hset->hsKind){ case TIEDHS: M=hset->tmRecs[s].nMix; break; case DISCRETEHS: case PLAINHS: case SHAREDHS: M=ste->info->nMix; break; } if (wa != NULL) { occi = wa->occ; if (occi>0) { me = ste->info->spdf.cpdf + 1; denom=0; for (m=1; m<=M; m++,me++){ vSize = VectorSize(me->mpdf->mean); tmp = me->weight*vSize*mapTau -1; if (tmp<0) tmp = 0; denom += tmp; } me = ste->info->spdf.cpdf + 1; for (m=1; m<=M; m++,me++){ vSize = VectorSize(me->mpdf->mean); tmp = me->weight*vSize*mapTau -1; if (tmp<0) tmp = 0; x = (tmp + wa->c[m])/(denom + occi); if (x>1.0){ if (x>1.001) HError(2393,"UpdateWeights: Model %d[%s]: mix too big in %d.%d.%d %5.5f", px,HMMPhysName(hset,hmm),i,s,m,x); x = 1.0; } switch (hset->hsKind){ case TIEDHS: ste->info->spdf.tpdf[m] = x; break; case DISCRETEHS: ste->info->spdf.dpdf[m]=DProb2Short(x); break; case PLAINHS: case SHAREDHS: me=ste->info->spdf.cpdf+m; me->weight = x; break; } } if (mixWeightFloor>0.0){ FloorMixtures(hset->hsKind,ste->info,M,mixWeightFloor); } /* Force a normalisation becomes of weird zeroing .... */ if ((hset->hsKind == PLAINHS) || (hset->hsKind == SHAREDHS)) { me = ste->info->spdf.cpdf + 1; x=0; for (m=1; m<=M; m++,me++) x += me->weight; if (x>1.001) HError(-1,"Updating Weights, sum too large (%f)\n",x); me = ste->info->spdf.cpdf + 1; for (m=1; m<=M; m++,me++) me->weight /= x; } } ste->info->hook = NULL; } } } }
case 'l': maxSpUtt = GetChkedInt(0,0100000,s); break; case 'E': if (NextArg()!=STRINGARG) HError(2319,"HERest: parent transform directory expected"); xfInfo.usePaXForm = TRUE; xfInfo.paXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -E as the last option"); break; case 'J': if (NextArg()!=STRINGARG) HError(2319,"HERest: input transform directory expected"); AddInXFormDir(&hset,GetStrArg()); if (NextArg()==STRINGARG) { if (xfInfo.inXFormExt == NULL) xfInfo.inXFormExt = GetStrArg(); else HError(2319,"HERest: only one input transform extension may be specified"); } if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -J as the last option"); break; case 'K': if (NextArg()!=STRINGARG) HError(2319,"HERest: output transform directory expected"); xfInfo.outXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.outXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -K as the last option"); break; case 'z': if (NextArg() != STRINGARG) HError(2319,"HERest: output TMF file expected");
/* track speakers */ if (UpdateSpkrStats(&hset,&xfInfo, datafn)) spUtt=0; /* Check to see whether set-up is valid */ CheckUpdateSetUp(); fbInfo->inXForm = xfInfo.inXForm; fbInfo->al_inXForm = xfInfo.al_inXForm; fbInfo->paXForm = xfInfo.paXForm; if ((maxSpUtt==0) || (spUtt<maxSpUtt)) DoForwardBackward(fbInfo, utt, datafn, datafn2) ; numUtt += 1; spUtt++; } } while (NumArgs()>0); if (uFlags&UPXFORM) {/* ensure final speaker correctly handled */ UpdateSpkrStats(&hset,&xfInfo, NULL); if (trace&T_TOP) { printf("Reestimation complete - average log prob per frame = %e (%d frames)\n", totalPr/totalT, totalT); } } else { if (parMode>0 || (parMode==0 && (updateMode&UPMODE_DUMP))){ MakeFN("HER$.acc",newDir,NULL,newFn); f=DumpAccs(&hset,newFn,uFlags,parMode); tmpFlt = (float)totalPr; WriteFloat(f,&tmpFlt,1,ldBinary); WriteInt(f,(int*)&totalT,1,ldBinary); fclose( f ); } if (parMode <= 0) { if (stats) { StatReport(&hset); } if (updateMode&UPMODE_UPDATE) UpdateModels(&hset,utt->pbuf2); } } ResetHeap(&uttStack); ResetHeap(&fbInfoStack); ResetHeap(&hmmStack);