/* LoadFile: load whole file or segments and accumulate variance */
static void LoadFile(char *fn)
{
   ParmBuf pbuf;
   BufferInfo info;
   char labfn[80];
   Transcription *trans;
   long segStIdx,segEnIdx;  
   int i,j,ncas,nObs;
   LLink p;
   
   if (segId == NULL)  {   /* load whole parameter file */
      if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL)
         HError(2050,"LoadFile: Config parameters invalid");
      GetBufferInfo(pbuf,&info);
      CheckData(fn,info);
      nObs = ObsInBuffer(pbuf);
      for (i=0; i<nObs; i++){
         ReadAsTable(pbuf,i,&obs);
         AccVar(obs);  
      }
      if (trace&T_LOAD) {
         printf(" %d observations loaded from %s\n",nObs,fn);
         fflush(stdout);
      }        
      CloseBuffer(pbuf);
   }
   else {                  /* load segment of parameter file */
      MakeFN(fn,labDir,labExt,labfn);
      trans = LOpen(&iStack,labfn,lff);
      ncas = NumCases(trans->head,segId);
      if ( ncas > 0) {
         if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL)
            HError(2050,"LoadFile: Config parameters invalid");
         GetBufferInfo(pbuf,&info);
         CheckData(fn,info);
         for (i=1,nObs=0; i<=ncas; i++) {
            p = GetCase(trans->head,segId,i);
            segStIdx= (long) (p->start/info.tgtSampRate);
            segEnIdx  = (long) (p->end/info.tgtSampRate);
            if (trace&T_SEGS)
               printf(" loading seg %s [%ld->%ld]\n",
                      segId->name,segStIdx,segEnIdx);
            if (segEnIdx >= ObsInBuffer(pbuf))
               segEnIdx = ObsInBuffer(pbuf)-1;
            if (segEnIdx >= segStIdx) {
               for (j=segStIdx;j<=segEnIdx;j++) {
                  ReadAsTable(pbuf,j,&obs);
                  AccVar(obs); ++nObs;
               }
            }
         }        
         CloseBuffer(pbuf);
         if (trace&T_LOAD)
            printf(" %d observations loaded from %s\n",nObs,fn);
      }  
   }
   ResetHeap(&iStack);
}
Beispiel #2
0
/* LoadTransLabs: Load transcription from file */
Transcription *LoadTransLabs(char *src)
{
   Transcription *t;
   
   MakeFN(src,labDir,labExt,labFile);
   if(trace & T_SEGMENT)
      printf("Loading label file %s\n",labFile);
   t = LOpen(&lStack,labFile,srcLabFF);
   if(chopF && ! stenSet) SetLabSeg(t);
   return t;
}
Beispiel #3
0
/* DoGeneration: Generate parameter sequences from HMMs */
void DoGeneration(char *labfn)
{
   char labFn[MAXFNAMELEN], buf[MAXSTRLEN];
   int t;
   Boolean eSep;
   Transcription *tr;

   if (trace & T_TOP) {
      printf(" Generating Label %s\n", NameOf(labfn, buf));
      fflush(stdout);
   }

   /* load a given input label file */
   ResetHeap(&utt->transStack);
   MakeFN(labfn, labDir, labExt, labFn);
   tr = LOpen(&genStack, labFn, lff);

   /* compose a sentence HMM corresponding to the input label */
   InitialiseGenInfo(genInfo, tr, FALSE);

   /* set utterance informations for forward-backward algorithm */
   SetStreamWidths(hmset.pkind, hmset.vecSize, hmset.swidth, &eSep);
   utt->tr = tr;
   utt->Q = genInfo->labseqlen;
   utt->T = genInfo->tframe;
   utt->twoDataFiles = FALSE;
   utt->o = (Observation *) New(&gstack, utt->T * sizeof(Observation));
   utt->o--;
   for (t = 1; t <= utt->T; t++)
      utt->o[t] = MakeObservation(&gstack, hmset.swidth, hmset.pkind, FALSE, eSep);

   /* parameter generation */
   ParamGen(genInfo, utt, fbInfo, type);

   /* output state durations and generated parameter sequences */
   if (!stateAlign)
      WriteStateDurations(labfn, genInfo);
   WriteParms(labfn, genInfo);

   /* free memory */
   Dispose(&gstack, ++utt->o);
   ResetGenInfo(genInfo);

   /* increment total number of generated frames */
   totalT += utt->T;
   totalPr += utt->pr;

   return;
}
Beispiel #4
0
int main(int argc, char *argv[])
{
   char * labFn, *listfn, *s;
   int i,fidx;
   MLFEntry *me = NULL;
   Transcription *t;
   void InitStats(char *listfn);
   void GatherStats(Transcription *t);
   void OutputStats(void);

   if(InitShell(argc,argv,hlstats_version,hlstats_vc_id)<SUCCESS)
      HError(1300,"HLStats: InitShell failed");

   InitMem();   InitMath();
   InitWave();  InitLabel();
   InitLM();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(0);
   SetConfParms();
   
   enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */
   exitId=GetLabId("!EXIT",TRUE);   /*  to start enterId and end exitId */
   nullId=GetLabId("!NULL",TRUE);  /* Name for words not in list */

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(1319,"HLStats: Bad switch %s; must be single letter",s);
      switch(s[0]){
      case 'b':
         doBigram = TRUE;
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Ngram output file name expected");
         bigFile = GetStrArg();
         break;
      case 'c':
         doLCount = TRUE;
         lCountLimit = GetChkedInt(0,100000,s);
         break;
      case 'd':
         doDurs = TRUE; break;
      case 'f':
         bigFloor = GetChkedFlt(0.0,1000.0,s);
         break;
      case 'h':
         hSize =  GetChkedInt(1,2,s);
         break;
      case 'l':
         doList = TRUE; 
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Output label list file name expected");
         listFile = GetStrArg();
         break;
      case 'o':
         doBOff = TRUE;
         break;
      case 'p':
         doPCount = TRUE;
         pCountLimit = GetChkedInt(0,100000,s);
         break;
      case 's':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: ENTER label name expected");
         enterId=GetLabId(GetStrArg(),TRUE);
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: EXIT label name expected");
         exitId=GetLabId(GetStrArg(),TRUE);
         break;
      case 't':
         bigThresh = GetChkedInt(0,100,s);
         break;
      case 'u':
         uniFloor = GetChkedFlt(0.0,1000.0,s);
         break;
      case 'G':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Input label File format expected");
         if((ff = Str2Format(GetStrArg())) == ALIEN)
            HError(-1389,"HLStats: Warning ALIEN Label file format set");
         break;
      case 'I':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Input MLF file name expected");
         LoadMasterFile(GetStrArg());
         break;
      case 'T':
         if (NextArg() != INTARG)
            HError(1319,"HLStats: Trace value expected");
         trace = GetChkedInt(0,017,s); break;
      default:
         HError(1319,"HLStats: Unknown switch %s",s);
      }
   }

   if (NextArg()!=STRINGARG)
      HError(1319,"HLStats: Label list file name expected");
   listfn = GetStrArg();
   if (!(doDurs || doBigram || doList || doLCount || doPCount))
      HError(1330,"HLStats: Nothing to do!");
   InitStats(listfn);

   i=0;
   while (NumArgs()>0) {
      if (NextArg()!=STRINGARG)
         HError(1319,"HLStats: Input label file name expected");
      labFn = GetStrArg();
      if (IsMLFFile(labFn)) {
         fidx = NumMLFFiles();
         if ((me=GetMLFTable()) != NULL) {
            while(me->next != NULL) me=me->next;
            LoadMasterFile(labFn);
            me=me->next;
         }
         else {
            LoadMasterFile(labFn);
            me=GetMLFTable();
         }
         while (me != NULL) {
            if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) {
               if (trace&T_FIL) {
                  printf("  Processing file %s\n",me->pattern); fflush(stdout);
               }
               t = LOpen(&tmpHeap,me->pattern,ff);
               if (t->numLists<1)
                  HError(-1330,"HLStats: Empty file %s",me->pattern);
               else
                  GatherStats(t),i++;

               Dispose(&tmpHeap,t);
            }
            me = me->next;
            if ((trace&T_BAS) && !(trace&T_FIL) &&
                NumMLFEntries()>5000 && i%1000==0) 
               printf(". "),fflush(stdout);
         }
         if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000)
            printf("\n");
      } else {
         if (trace&T_FIL) {
            printf("  Processing file %s\n",labFn); fflush(stdout);
         }
         t = LOpen(&tmpHeap,labFn,ff);
         if (t->numLists<1)
            HError(-1330,"HLStats: Empty file %s",me->pattern);
         else
            GatherStats(t),i++;
         Dispose(&tmpHeap,t);
      }
   }
   if (trace&T_MEM)
      PrintAllHeapStats();
   OutputStats();

   if (trace&T_MEM)
      PrintAllHeapStats();
   Exit(0);
   return (0);          /* never reached -- make compiler happy */
}
Beispiel #5
0
/* LoadFile: load whole file or segments into segStore */
void LoadFile(char *fn)
{
   BufferInfo info;
   char labfn[80];
   Transcription *trans;
   long segStIdx,segEnIdx;
   static int segIdx=1;  /* Between call handle on latest seg in segStore */  
   static int prevSegIdx=1;
   HTime tStart, tEnd;
   int i,k,s,ncas,nObs=0,segLen;
   LLink p;
   Observation obs;

   if((pbuf=OpenBuffer(&bufferStack, fn, 10, dff, FALSE_dup, FALSE_dup))==NULL)
      HError(2150,"LoadFile: Config parameters invalid");
   GetBufferInfo(pbuf,&info);
   CheckData(fn,info);
   if (firstTime) InitSegStore(&info);

   if (segId == NULL)  {   /* load whole parameter file */
      nObs = ObsInBuffer(pbuf);
      tStart = 0.0;
      tEnd = (info.tgtSampRate * nObs);
      LoadSegment(segStore, tStart, tEnd, pbuf);
      segIdx++;
   }
   else {                  /* load segment of parameter file */
      MakeFN(fn,labDir,labExt,labfn);
      trans = LOpen(&transStack,labfn,lff);
      ncas = NumCases(trans->head,segId);
      if ( ncas > 0) {
         for (i=1,nObs=0; i<=ncas; i++) {
            p = GetCase(trans->head,segId,i);
            segStIdx = (long)(p->start/info.tgtSampRate);
            segEnIdx = (long)(p->end/info.tgtSampRate);
            if (segEnIdx >= ObsInBuffer(pbuf))
               segEnIdx = ObsInBuffer(pbuf)-1;
            if (segEnIdx - segStIdx + 1 >= nStates-2) {
               LoadSegment(segStore, p->start, p->end, pbuf);
               if (trace&T_LD1)
                  printf("  loading seg %s %f[%ld]->%f[%ld]\n",segId->name,
                         p->start,segStIdx,p->end,segEnIdx);
               nObs += SegLength(segStore, segIdx);
               segIdx++;
            }else if (trace&T_LD1)
               printf("   seg %s %f->%f ignored\n",segId->name,
                      p->start,p->end);
         }        
      }  
   }
   if (hset.hsKind == DISCRETEHS){
      for (k=prevSegIdx; k<segIdx; k++){
         segLen = SegLength(segStore, k);
         for (i=1; i<=segLen; i++){
            obs = GetSegObs(segStore, k, i);
            for (s=1; s<=nStreams; s++){
               if( (obs.vq[s] < 1) || (obs.vq[s] > maxMixInS[s]))
                  HError(2150,"LoadFile: Discrete data value [ %d ] out of range in stream [ %d ] in file %s",obs.vq[s],s,fn);
            }
         }
      }
      prevSegIdx=segIdx;
   }

   if (trace&T_LD0)
      printf(" %d observations loaded from %s\n",nObs,fn);
   CloseBuffer(pbuf);
   ResetHeap(&transStack);
}
Beispiel #6
0
/* LoadFile: load whole file or segments and accumulate variance */
void LoadFile(char *fn)
{
   ParmBuf pbuf;
   BufferInfo info;
   char labfn[80];
   Transcription *trans;
   long segStIdx,segEnIdx;  
   int i,s,j,ncas,nObs=0;
   LLink p;

   if (segId == NULL)  {   /* load whole parameter file */
      if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL)
         HError(2550,"LoadFile: Config parameters invalid");
      GetBufferInfo(pbuf,&info);
      CheckData(fn,info);
      nObs = ObsInBuffer(pbuf);
      
      for (i=0; i<nObs; i++) {
         for(s=1;s<=swidth[0];s++)
            obs.fv[s] = CreateVector(&dStack,swidth[s]);
         ReadAsTable(pbuf,i,&obs);
         for(s=1;s<=swidth[0];s++)
            StoreItem(dSeq[s],(Ptr)obs.fv[s]);
      }
      CloseBuffer(pbuf);
   }
   else { /* load segment of parameter file */
      MakeFN(fn,labDir,labExt,labfn);
      trans = LOpen(&iStack,labfn,lff);
      ncas = NumCases(trans->head,segId);
      if ( ncas > 0) {
         if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL)
            HError(2550,"LoadFile: Config parameters invalid");
         GetBufferInfo(pbuf,&info);
         CheckData(fn,info);
         for (i=1,nObs=0; i<=ncas; i++) {
            p = GetCase(trans->head,segId,i);
            segStIdx= (long) (p->start/info.tgtSampRate);
            segEnIdx  = (long) (p->end/info.tgtSampRate);
            if (trace&T_SEGS)
               printf(" loading seg %s [%ld->%ld]\n",
                      segId->name,segStIdx,segEnIdx);
            if (segEnIdx >= ObsInBuffer(pbuf))
               segEnIdx = ObsInBuffer(pbuf)-1;
            if (segEnIdx >= segStIdx) {
               for (j=segStIdx;j<=segEnIdx;j++) {
                  /* SJY: The HInit code I copied this from had no */
                  /* SJY: CreateVector call here -- a bug? */
                  for(s=1;s<=swidth[0];s++)
                     obs.fv[s] = CreateVector(&dStack,swidth[s]);
                  ReadAsTable(pbuf,j,&obs);
                  for(s=1;s<=swidth[0];s++)
                     StoreItem(dSeq[s],(Ptr)obs.fv[s]);
                  ++nObs;
               }
            }
         }        
         CloseBuffer(pbuf);
      }  
   }
   ResetHeap(&iStack);
   if (trace&T_LOAD) {
      printf(" %5d obs loaded from %s, streams: ",nObs,fn);
      for(s=1;s<=swidth[0];s++) printf("[%d]" ,swidth[s]);
      printf("\n"); fflush(stdout);
   }        
}
Beispiel #7
0
/* ProcessLabelFile: compute perplexity and related statistics from labels */
static void ProcessLabelFile(char *fn, int nSize)
{
   LLink ll;
   double ppl;
   LabList *ref;
   LabId lab;
   Transcription *tr;
   int i,numPLabs,nLabel;

   tr = LOpen(&tempHeap, fn, lff);
   if (tr->numLists < 1) {
      HError(-16635,"ProcessLabelFile: transcription file %s is Empty",fn);
      return;
   }
   ref = GetLabelList(tr, 1);
   if (ref->head->succ == ref->tail) {
      HError(-16635,"ProcessLabelFile: transcription file %s is Empty",fn);
      return;
   }
   if (trace>0) {
      printf("Processing label file: %s\n", fn);
      fflush(stdout);
   }

   nLabel = CountLabs(ref);
   ZeroStats(&sent);
   sent.nTok = nLabel + 2; sent.nUtt = 1;

   /* copy labels into pLab, mapping OOVs */
   numPLabs = 0;
   if (sstId!=NULL)             /* add sentence start marker(s) */
      for (i=0; i<(nSize-1); i++) pLab[numPLabs++] = sstId;
   for (i=0,ll=ref->head->succ; i<nLabel; i++,ll=ll->succ) {
      lab = GetEQLab(ll->labid);
      if ((i==0) && IS_SST(lab)) {
	sent.nTok--; continue;
      }
      if ((i==(nLabel-1)) && IS_SEN(lab)) {
	 sent.nTok--; continue;
      }
      if (IS_UNK(lab)) {
	 if (trace&T_OOV)
	    printf("mapping OOV: %s\n", lab->name);
	 StoreOOV(&sent,lab,1); lab = unkId;
      }
      pLab[numPLabs++] = lab;
      if (numPLabs>=LBUF_SIZE) {
         HError(16650, "Maximum utterance length in a label file exceeded (limit is compiled to be %d tokens)",
                LBUF_SIZE);
      }
   }
   if (senId!=NULL)             /* add sentence end marker */
     pLab[numPLabs++] = senId;

   CalcPerplexity(&sent, pLab, numPLabs, nSize);
   AddStats(&sent, &totl);

   if (trace&T_SEL) {     /* compact info for sentence selection */
      ppl = exp(-(sent.logpp)/(double) (sent.nWrd));
      printf("#! %.4f", ppl);
      for (i=0, ll=ref->head->succ; i<nLabel; i++, ll=ll->succ)
	 printf(" %s", ll->labid->name);
      printf("\n"); fflush(stdout);
   }
}
Beispiel #8
0
/* DoAlignment: by creating network from transcriptions or lattices */
void DoAlignment(void)
{
   FILE *nf;
   char lfn[MAXSTRLEN], buf[MAXSTRLEN];
   Transcription *trans;
   Network *net;
   Boolean isPipe;
   int n=0;
   LogDouble currGenBeam;
   AdaptXForm *incXForm;

   if (trace&T_TOP) {
      if (loadNetworks) 
         printf("New network will be used for each file\n");
      else
         printf("Label file will be used to align each file\n");
      fflush(stdout);
   }
   CreateHeap(&netHeap,"Net heap",MSTAK,1,0,8000,80000);
   while (NumArgs()>0) {
      if (NextArg() != STRINGARG)
         HError(3219,"DoAlignment: Data file name expected");
      datFN = GetStrArg();
      if (trace&T_TOP) {
         printf("Aligning File: %s\n",datFN);  fflush(stdout);
      }
      if (labFileMask != NULL ) { /* support for rescoring lattice masks */
         if (!MaskMatch(labFileMask,buf,datFN))
            HError(2319,"DoAlignment: mask %s has no match with segemnt %s",labFileMask,datFN);
         MakeFN(buf,labInDir,labInExt,lfn);
      } else {
         MakeFN(datFN,labInDir,labInExt,lfn);
      }
      if (loadNetworks) {
         if ( (nf = FOpen(lfn,NetFilter,&isPipe)) == NULL)
            HError(3210,"DoAlignment: Cannot open Word Net file %s",lfn);
         if((wdNet = ReadLattice(nf,&netHeap,&vocab,TRUE,FALSE))==NULL)
            HError(3210,"DoAlignment: ReadLattice failed");
         FClose(nf,isPipe);
         if (trace&T_TOP) {
            printf("Read lattice with %d nodes / %d arcs\n",
                   wdNet->nn,wdNet->na);
            fflush(stdout);
         }
      }
      else {
         LabList *ll = NULL;

         trans=LOpen(&netHeap,lfn,ifmt);
         if (trans->numLists >= 1)
            ll = GetLabelList(trans,1);
         if (!ll && !bndId)
            HError(3233, "DoAlignment: cannot align empty transcription");

         wdNet=LatticeFromLabels(ll, bndId, &vocab,&netHeap);
         if (trace&T_TOP) {
            printf("Created lattice with %d nodes / %d arcs from label file\n",
                   wdNet->nn,wdNet->na);
            fflush(stdout);
         }
      }
      net=ExpandWordNet(&netHeap,wdNet,&vocab,&hset);

      ++n;
      currGenBeam = genBeam;
      /* This handles the initial input transform, parent transform setting
	 and output transform creation */
      if (UpdateSpkrStats(&hset, &xfInfo, datFN) && (!(xfInfo.useInXForm)) && (hset.semiTied == NULL)) {
         xfInfo.inXForm = NULL;
      }
      if (genBeamInc == 0.0)
         ProcessFile (datFN, net, n, currGenBeam, FALSE);
      else {
         Boolean completed;

         completed = ProcessFile (datFN, net, n, currGenBeam, TRUE);
         currGenBeam += genBeamInc;
         while (!completed && (currGenBeam <= genBeamLim - genBeamInc)) {
            completed = ProcessFile (datFN, net, n, currGenBeam, TRUE);
            currGenBeam += genBeamInc;
         }
         if (!completed)
            ProcessFile (datFN, net, n, currGenBeam, FALSE);
      }

      if (update > 0 && n%update == 0) {
         if (trace&T_TOP) {
            printf("Transforming model set\n");
            fflush(stdout);
         }
	 /* 
	    at every stage a new transform is created - fix?? 
	    Estimate transform and then set it up as the 
	    input XForm
	 */
	 incXForm = CreateAdaptXForm(&hset,"inc");
         TidyBaseAccs();
	 GenAdaptXForm(&hset,incXForm);
         xfInfo.inXForm = GetMLLRDiagCov(incXForm);;
	 SetXForm(&hset,xfInfo.inXForm);
	 ApplyHMMSetXForm(&hset,xfInfo.inXForm);
      }
      ResetHeap(&netHeap);
   }
}