コード例 #1
0
ファイル: ARec.cpp プロジェクト: deardaniel/PizzaTest
// Prime the recogniser ready to process an utterance
void ARec::PrimeRecogniser()
{
   ResourceGroup *g;

   g = (grpName=="")?rmgr->MainGroup():rmgr->FindGroup(grpName);
   if (g == NULL){
      if (grpName=="") HRError(0,"ARec: cant find main resource group\n");
      else HRError(0,"ARec: cant find resource group %s\n",grpName.c_str());
      throw ATK_Error(11001);
   }
   Network *net = g->MakeNetwork();
   LModel *lm = g->MakeNGram();
   opMap.clear();   // forget all previously output packets
   StartRecognition(pri,net,lmScale,wordPen,prScale,ngScale,lm);
   SetPruningLevels(pri,maxActive,genBeam,wordBeam,nBeam,10.0);
   frameCount = 0; tact = 0;
   if (showRD){
      string gn = (grpName=="")?"main":grpName;
      string s = "Primed with "+  gn + "\n";
      HPostMessage(HThreadSelf(),s.c_str());
   }
}
コード例 #2
0
ファイル: HVite.c プロジェクト: 2hanson/voice_dialling
/* ProcessFile: process given file. If fn=NULL then direct audio */
Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable)
{
   FILE *file;
   ParmBuf pbuf;
   BufferInfo pbinfo;
   NetNode *d;
   Lattice *lat;
   LArc *arc,*cur;
   LNode *node;
   Transcription *trans;
   MLink m;
   LogFloat lmlk,aclk;
   int s,j,tact,nFrames;
   LatFormat form;
   char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN];
   Boolean enableOutput = TRUE, isPipe;

   if (fn!=NULL)
      strcpy(thisFN,fn);
   else if (fn==NULL && saveAudioOut)
      CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN);
   else 
      enableOutput = FALSE;
      
   if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL)
      HError(3250,"ProcessFile: Config parameters invalid");   

   /* Check pbuf same as hset */
   GetBufferInfo(pbuf,&pbinfo);
   if (pbinfo.tgtPK!=hset.pkind)
      HError(3231,"ProcessFile: Incompatible sample kind %s vs %s",
             ParmKind2Str(pbinfo.tgtPK,buf1),
             ParmKind2Str(hset.pkind,buf2));
   if (pbinfo.a != NULL && replay)  AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate)));

   StartRecognition(vri,net,lmScale,wordPen,prScale);
   SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam);
 
   tact=0;nFrames=0;
   StartBuffer(pbuf);
   while(BufferStatus(pbuf)!=PB_CLEARED) {
      ReadAsBuffer(pbuf,&obs);
      if (trace&T_OBS) PrintObservation(nFrames,&obs,13);      

      if (hset.hsKind==DISCRETEHS){
         for (s=1; s<=hset.swidth[0]; s++){
            if( (obs.vq[s] < 1) || (obs.vq[s] > maxMixInS[s]))
               HError(3250,"ProcessFile: Discrete data value [ %d ] out of range in stream [ %d ] in file %s",obs.vq[s],s,fn);
         }
      }

      ProcessObservation(vri,&obs,-1,xfInfo.inXForm);
      
      if (trace & T_FRS) {
         for (d=vri->genMaxNode,j=0;j<30;d=d->links[0].node,j++)
            if (d->type==n_word) break;
         if (d->type==n_word){
            if (d->info.pron==NULL) p=":bound:";
            else p=d->info.pron->word->wordName->name;
         }
         else p=":external:";
         m=FindMacroStruct(&hset,'h',vri->genMaxNode->info.hmm);
         printf("Optimum @%-4d HMM: %s (%s)  %d %5.3f\n",
                vri->frame,m->id->name,p,
                vri->nact,vri->genMaxTok.like/vri->frame);
         fflush(stdout);
      }
      nFrames++;
      tact+=vri->nact;
   }
   lat=CompleteRecognition(vri,pbinfo.tgtSampRate/10000000.0,&ansHeap);
   
   if (lat==NULL) {
      if ((trace & T_TOP) && fn != NULL){
         if (restartable)
            printf("No tokens survived to final node of network at beam %.1f\n", currGenBeam);
         else
            printf("No tokens survived to final node of network\n");
         fflush(stdout);
      } else if (fn==NULL){
         printf("Sorry [%d frames]?\n",nFrames);fflush(stdout);
      }      
      if (pbinfo.a != NULL && replay)  ReplayAudio(pbinfo);
      CloseBuffer(pbuf);
      return FALSE;
   }
   
   if (vri->noTokenSurvived && restartable)
      return FALSE;

   if (vri->noTokenSurvived && trace & T_TOP) {
      printf("No tokens survived to final node of network\n");
      printf("  Output most likely partial hypothesis within network\n");
      fflush(stdout);
   }

   lat->utterance=thisFN;
   lat->net=wdNetFn;
   lat->vocab=dictFn;
   
   if (trace & T_TOP || fn==NULL) {
      node=NULL;
      for (j=0;j<lat->nn;j++) {
         node=lat->lnodes+j;
         if (node->pred==NULL) break;
         node=NULL;
      }
      aclk=lmlk=0.0;
      while(node!=NULL) {
         for (arc=NULL,cur=node->foll;cur!=NULL;cur=cur->farc) arc=cur;
         if (arc==NULL) break;
         if (arc->end->word!=NULL)
            printf("%s ",arc->end->word->wordName->name);
         aclk+=arc->aclike+arc->prlike*lat->prscale;
         lmlk+=arc->lmlike*lat->lmscale+lat->wdpenalty;
         node=arc->end;
      }
      printf(" ==  [%d frames] %.4f [Ac=%.1f LM=%.1f] (Act=%.1f)\n",nFrames,
             (aclk+lmlk)/nFrames, aclk,lmlk,(float)tact/nFrames);
      fflush(stdout);
   }
   if (pbinfo.a != NULL && replay)  ReplayAudio(pbinfo);
   
   /* accumulate stats for online unsupervised adaptation 
      only if a token survived */
   if ((lat != NULL) &&  (!vri->noTokenSurvived) && ((update > 0) || (xfInfo.useOutXForm)))
      DoOnlineAdaptation(lat, pbuf, nFrames);

   if (enableOutput){
      if (nToks>1 && latExt!=NULL) {
         MakeFN(thisFN,labDir,latExt,lfn);
         if ((file=FOpen(lfn,NetOFilter,&isPipe))==NULL) 
            HError(3211,"ProcessFile: Could not open file %s for lattice output",lfn);
         if (latForm==NULL)
            form=HLAT_DEFAULT;
         else {
            for (p=latForm,form=0;*p!=0;p++) {
               switch (*p) {
               case 'A': form|=HLAT_ALABS; break;
               case 'B': form|=HLAT_LBIN; break;
               case 't': form|=HLAT_TIMES; break;
               case 'v': form|=HLAT_PRON; break;
               case 'a': form|=HLAT_ACLIKE; break;
               case 'l': form|=HLAT_LMLIKE; break;
               case 'd': form|=HLAT_ALIGN; break;
               case 'm': form|=HLAT_ALDUR; break;
               case 'n': form|=HLAT_ALLIKE; break;
               case 'r': form|=HLAT_PRLIKE; break;
               }
            }
         }
         if(WriteLattice(lat,file,form)<SUCCESS)
            HError(3214,"ProcessFile: WriteLattice failed");

         FClose(file,isPipe);
      }

      /* only output 1-best transcription if generating lattices */
      if (nTrans > 1 && latExt != NULL) 
         trans=TranscriptionFromLattice(&ansHeap,lat,1);
      /* output N-best transcriptions as usual */
      else
      trans=TranscriptionFromLattice(&ansHeap,lat,nTrans);
      
      if (labForm!=NULL)
         FormatTranscription(trans,pbinfo.tgtSampRate,states,models,
                             strchr(labForm,'X')!=NULL,
                             strchr(labForm,'N')!=NULL,strchr(labForm,'S')!=NULL,
                             strchr(labForm,'C')!=NULL,strchr(labForm,'T')!=NULL,
                             strchr(labForm,'W')!=NULL,strchr(labForm,'M')!=NULL);

      MakeFN(thisFN,labDir,labExt,lfn);
      /* if(LSave(lfn,trans,ofmt)<SUCCESS)
         HError(3214,"ProcessFile: Cannot save file %s", lfn); */
      LSave(lfn,trans,ofmt);
      Dispose(&ansHeap,trans);
   }
   Dispose(&ansHeap,lat);
   CloseBuffer(pbuf);
   if (trace & T_MMU){
      printf("Memory State after utter %d\n",utterNum);
      PrintAllHeapStats();
   }

   return !vri->noTokenSurvived;
}
コード例 #3
0
ファイル: HVite.c プロジェクト: 2hanson/voice_dialling
/* Initialise: set up global data structures */
void Initialise(void)
{
   Boolean eSep;
   int s;

   /* Load hmms, convert to inverse DiagC */
   if(MakeHMMSet(&hset,hmmListFn)<SUCCESS) 
      HError(3228,"Initialise: MakeHMMSet failed");
   if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS) 
      HError(3228,"Initialise: LoadHMMSet failed");
   ConvDiagC(&hset,TRUE);
   
   /* Create observation and storage for input buffer */
   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);
   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,
                       hset.hsKind==DISCRETEHS,eSep);

   /* sort out masks just in case using adaptation */
   if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; 
   if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; 

   if (xfInfo.useOutXForm || (update>0)) {
      CreateHeap(&regHeap,   "regClassStore",  MSTAK, 1, 0.5, 1000, 8000 );
      /* This initialises things - temporary hack - THINK!! */
      CreateAdaptXForm(&hset, "tmp");
      /* initialise structures for the f-b frame-state alignment pass */
      utt = (UttInfo *) New(&regHeap, sizeof(UttInfo));
      fbInfo = (FBInfo *) New(&regHeap, sizeof(FBInfo));
      /* initialise a recogniser for frame/state alignment purposes */
      alignpsi=InitPSetInfo(&hset);
      alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE);
      SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam);
      InitUttInfo(utt, FALSE);
      InitialiseForBack(fbInfo, &regHeap, &hset,
                        (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0, 
                        genBeam*4.0+1.0, 10.0);
      utt->twoDataFiles = FALSE;
      utt->S = hset.swidth[0]; 
      AttachPreComps(&hset,hset.hmem);
   }
    
   CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000);
   CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000);
   
   maxM = MaxMixInSet(&hset);
   for (s=1; s<=hset.swidth[0]; s++)
      maxMixInS[s] = MaxMixInSetS(&hset, s);
   if (trace&T_TOP) {
      printf("Read %d physical / %d logical HMMs\n",
             hset.numPhyHMM,hset.numLogHMM);  fflush(stdout);
   }
   
   /* Initialise recogniser */
   if (nToks>1) nBeam=genBeam;
   psi=InitPSetInfo(&hset);
   vri=InitVRecInfo(psi,nToks,models,states);

   /* Read dictionary and create storage for lattice */
   InitVocab(&vocab);   
   if(ReadDict(dictFn,&vocab)<SUCCESS) 
      HError(3213, "Main: ReadDict failed");
   CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000);
   if (trace & T_MEM){
      printf("Memory State After Initialisation\n");
      PrintAllHeapStats();
   }
}