/* UCollectData: Collect data from segStore for each stream s of each state n and store in seqMat[n][s]*/ void UCollectData(Sequence **seqMat) { int i,j,s,numSegs,segLen; long int n; float obsPerState; Observation obs; Ptr p; numSegs = NumSegs(segStore); for (i=1;i<=numSegs;i++) { segLen=SegLength(segStore,i); obsPerState=((float) segLen)/((float) (nStates-2)); if (obsPerState < 1.0) HError(2122,"UCollectData: segment too short[%d]",segLen); for (j=1;j<=segLen;j++) { obs = GetSegObs(segStore,i,j); n = (long int)(((float)(j-1)/obsPerState)+2); for (s=1; s<=nStreams; s++){ if (hset.hsKind==DISCRETEHS){ p = (Ptr)((long int)obs.vq[s]); StoreItem(seqMat[n][s],p); }else StoreItem(seqMat[n][s],obs.fv[s]); } } } }
/* UCollectData: Collect data from segStore for each stream s of each state n and store in seqMat[n][s]*/ void UCollectData(Sequence ***seqMat) { int i,j,k,n,s,numSegs,segLen,order; float obsPerState; Observation obs; Ptr p; numSegs = NumSegs(segStore); for (i=1;i<=numSegs;i++) { segLen=SegLength(segStore,i); obsPerState=((float) segLen)/((float) (nStates-2)); if (obsPerState < 1.0) HError(2122,"UCollectData: segment too short[%d]",segLen); for (j=1;j<=segLen;j++) { obs = GetSegObs(segStore,i,j); n = (int)(((float)(j-1)/obsPerState)+2); for (s=1; s<=nStreams; s++){ if (hset.hsKind==DISCRETEHS){ p = (Ptr)((long)obs.vq[s]); StoreItem(seqMat[n][s][1],p); } else if(hset.msdflag[s]){ order = SpaceOrder(obs.fv[s]); if ((k = IncludeSpace(msdInfo[n][s],order))) StoreItem(seqMat[n][s][k],obs.fv[s]); else if(!ignOutVec) HError(2122,"UCollectData: no space corresponded to order[%d]",order); }else StoreItem(seqMat[n][s][1],obs.fv[s]); } } } }
/* EstimateModel: top level of iterative estimation process */ void EstimateModel(void) { LogFloat totalP,newP,delta; Boolean converged = FALSE; int i,iter,numSegs,segLen; IntVec states; /* array[1..numSegs] of State */ IntVec *mixes; /* array[1..S][1..numSegs] of MixComp */ if (trace&T_TOP) printf("Starting Estimation Process\n"); if (newModel){ UniformSegment(); } totalP=LZERO; for (iter=1; !converged && iter<=maxIter; iter++){ ZeroAccs(&hset, uFlags); /* Clear all accumulators */ numSegs = NumSegs(segStore); /* Align on each training segment and accumulate stats */ for (newP=0.0,i=1;i<=numSegs;i++) { segLen = SegLength(segStore,i); states = CreateIntVec(&gstack,segLen); mixes = (hset.hsKind==DISCRETEHS)?NULL: CreateMixes(&gstack,segLen); newP += ViterbiAlign(i,segLen,states,mixes); if (trace&T_ALN) ShowAlignment(i,segLen,states,mixes); UpdateCounts(i,segLen,states,mixes); FreeIntVec(&gstack,states); /* disposes mixes too */ } /* Update parameters or quit */ newP /= (float)numSegs; delta = newP - totalP; converged = (iter>1) && (fabs(delta) < epsilon); if (!converged) UpdateParameters(); totalP = newP; if (trace & T_TOP){ printf("Iteration %d: Average LogP =%12.5f",iter,totalP); if (iter > 1) printf(" Change =%12.5f\n",delta); else printf("\n"); fflush(stdout); } } if (trace&T_TOP) { if (converged) printf("Estimation converged at iteration %d\n",iter); else printf("Estimation aborted at iteration %d\n",iter); fflush(stdout); } }
int main(int argc, char *argv[]) { char *datafn, *s; int nSeg; void Initialise(void); void LoadFile(char *fn); void EstimateModel(void); void SaveModel(char *outfn); if(InitShell(argc,argv,hinit_version,hinit_vc_id)<SUCCESS) HError(2100,"HInit: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2100,"HInit: InitParm failed"); InitTrain(); InitUtil(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHMMSet(&hset,&gstack,FALSE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2119,"HInit: Bad switch %s; must be single letter",s); switch(s[0]){ case 'e': epsilon = GetChkedFlt(0.0,1.0,s); break; case 'i': maxIter = GetChkedInt(0,100,s); break; case 'l': if (NextArg() != STRINGARG) HError(2119,"HInit: Segment label expected"); segLab = GetStrArg(); break; case 'm': minSeg = GetChkedInt(1,1000,s); break; case 'n': newModel = FALSE; break; case 'o': outfn = GetStrArg(); break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,10.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'B': saveBinary = TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2119,"HInit: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Data file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(2119,"HInit: Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(2119,"HInit: HMM macro file name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(2119,"HInit: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg()!=STRINGARG) HError(2119,"HInit: Output macro file directory expected"); outDir = GetStrArg(); break; case 'T': if (NextArg() != INTARG) HError(2119,"HInit: Trace value expected"); trace = GetChkedInt(0,01777,s); break; case 'X': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file extension expected"); labExt = GetStrArg(); break; default: HError(2119,"HInit: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(2119,"HInit: source HMM file name expected"); hmmfn = GetStrArg(); Initialise(); do { if (NextArg()!=STRINGARG) HError(2119,"HInit: training data file name expected"); datafn = GetStrArg(); LoadFile(datafn); } while (NumArgs()>0); nSeg = NumSegs(segStore); if (nSeg < minSeg) HError(2121,"HInit: Too Few Observation Sequences [%d]",nSeg); if (trace&T_TOP) { printf("%d Observation Sequences Loaded\n",nSeg); fflush(stdout); } EstimateModel(); SaveModel(outfn); if (trace&T_TOP) printf("Output written to directory %s\n", outDir==NULL?"current":outDir); Exit(0); return (0); /* never reached -- make compiler happy */ }
int mainHInit(int argc, char *argv[]) { static int ft=1; char *datafn, *s; int nSeg; zwangModify(); if(InitShell(argc,argv,hinit_version,hinit_vc_id)<SUCCESS) HError(2100,"HInit: InitShell failed"); if(ft) { if(isMemInit==0) { InitMem(); isMemInit=1; } InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2100,"HInit: InitParm failed"); InitTrain(); InitUtil(); ft=0; /* Stacks for global structures requiring memory allocation */ CreateHeap(&segmentStack,"SegStore", MSTAK, 1, 0.0, 100000, LONG_MAX); CreateHeap(&sequenceStack,"SeqStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&clustSetStack,"ClustSetStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&transStack,"TransStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&traceBackStack,"TraceBackStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&bufferStack,"BufferStore", MSTAK, 1, 0.0, 1000, 1000); } zwangHMemGetConf(); zwangHWaveGetConf(); zwangHLabelGetConf(); zwangHMathGetConf(); zwangHSigPGetConf(); zwangHAudioGetConf(); zwangHVQGetConf(); zwangHModelGetConf(); zwangHParmGetConf(); zwangHTrainGetConf(); zwangHUtilGetConf(); SetConfParms(); CreateHMMSet(&hset,&gstack,FALSE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2119,"HInit: Bad switch %s; must be single letter",s); switch(s[0]){ case 'e': epsilon = GetChkedFlt(0.0,1.0,s); break; case 'i': maxIter = GetChkedInt(0,100,s); break; case 'l': if (NextArg() != STRINGARG) HError(2119,"HInit: Segment label expected"); segLab = GetStrArg(); break; case 'm': minSeg = GetChkedInt(1,1000,s); break; case 'n': newModel = FALSE; break; case 'o': outfn = GetStrArg(); break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,10.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'B': saveBinary = TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2119,"HInit: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Data file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(2119,"HInit: Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(2119,"HInit: HMM macro file name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(2119,"HInit: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg()!=STRINGARG) HError(2119,"HInit: Output macro file directory expected"); outDir = GetStrArg(); break; case 'T': if (NextArg() != INTARG) HError(2119,"HInit: Trace value expected"); trace = GetChkedInt(0,01777,s); break; case 'X': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file extension expected"); labExt = GetStrArg(); break; default: HError(2119,"HInit: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(2119,"HInit: source HMM file name expected"); hmmfn = GetStrArg(); Initialise(); do { if (NextArg()!=STRINGARG) HError(2119,"HInit: training data file name expected"); datafn = GetStrArg(); LoadFile(datafn); } while (NumArgs()>0); nSeg = NumSegs(segStore); if (nSeg < minSeg) HError(2121,"HInit: Too Few Observation Sequences [%d]",nSeg); EstimateModel(); SaveModel(outfn); Dispose(hset.hmem,hset.mtab); ResetHeap(&gstack); ResetHeap(&segmentStack); ResetHeap(&sequenceStack); ResetHeap(&clustSetStack); ResetHeap(&transStack); ResetHeap(&traceBackStack); ResetHeap(&bufferStack); zwangInitParmClear(); zwangInitShellClear(); return (0); /* never reached -- make compiler happy */ }