/* DoOnlineAdaptation: Perform unsupervised online adaptation using the recognition hypothesis as the transcription */ int DoOnlineAdaptation(Lattice *lat, ParmBuf pbuf, int nFrames) { Transcription *modelTrans, *trans; BufferInfo pbinfo; Lattice *alignLat, *wordNet; Network *alignNet; int i; GetBufferInfo(pbuf,&pbinfo); trans=TranscriptionFromLattice(&netHeap,lat,1); wordNet=LatticeFromLabels(GetLabelList(trans,1),bndId, &vocab,&netHeap); alignNet=ExpandWordNet(&netHeap,wordNet,&vocab,&hset); StartRecognition(alignvri,alignNet,0.0,0.0,0.0); /* do forced alignment */ for (i = 0; i < nFrames; i++) { ReadAsTable(pbuf, i, &obs); ProcessObservation(alignvri,&obs,-1,xfInfo.inXForm); } alignLat=CompleteRecognition(alignvri, pbinfo.tgtSampRate/10000000.0, &netHeap); if (alignvri->noTokenSurvived) { Dispose(&netHeap, trans); /* Return value 0 to indicate zero frames process failed */ return 0; } modelTrans=TranscriptionFromLattice(&netHeap,alignLat,1); /* format the transcription so that it contains just the models */ FormatTranscription(modelTrans,pbinfo.tgtSampRate,FALSE,TRUE, FALSE,FALSE,TRUE,FALSE,TRUE,TRUE, FALSE); /* Now do the frame/state alignment accumulating MLLR statistics */ /* set the various values in the utterance storage */ utt->tr = modelTrans; utt->pbuf = pbuf; utt->Q = CountLabs(utt->tr->head); utt->T = nFrames; utt->ot = obs; /* do frame state alignment and accumulate statistics */ fbInfo->inXForm = xfInfo.inXForm; fbInfo->al_inXForm = xfInfo.inXForm; fbInfo->paXForm = xfInfo.paXForm; if (!FBFile(fbInfo, utt, NULL)) nFrames = 0; Dispose(&netHeap, trans); if (trace&T_TOP) { printf("Accumulated statistics...\n"); fflush(stdout); } return nFrames; }
bool RecogSession::OnDefineGrammar(mrcp_channel_t* pMrcpChannel) { if(GetScenario()->IsRecognizeEnabled()) { return StartRecognition(pMrcpChannel); } return Terminate(); }
bool RecogSession::OnChannelAdd(mrcp_channel_t* pMrcpChannel, mrcp_sig_status_code_e status) { if(!UmcSession::OnChannelAdd(pMrcpChannel,status)) return false; if(status != MRCP_SIG_STATUS_CODE_SUCCESS) { /* error case, just terminate the demo */ return Terminate(); } if(GetScenario()->IsDefineGrammarEnabled()) { mrcp_message_t* pMrcpMessage = CreateDefineGrammarRequest(pMrcpChannel); if(pMrcpMessage) SendMrcpRequest(pMrcpChannel,pMrcpMessage); return true; } return StartRecognition(pMrcpChannel); }
// Prime the recogniser ready to process an utterance void ARec::PrimeRecogniser() { ResourceGroup *g; g = (grpName=="")?rmgr->MainGroup():rmgr->FindGroup(grpName); if (g == NULL){ if (grpName=="") HRError(0,"ARec: cant find main resource group\n"); else HRError(0,"ARec: cant find resource group %s\n",grpName.c_str()); throw ATK_Error(11001); } Network *net = g->MakeNetwork(); LModel *lm = g->MakeNGram(); opMap.clear(); // forget all previously output packets StartRecognition(pri,net,lmScale,wordPen,prScale,ngScale,lm); SetPruningLevels(pri,maxActive,genBeam,wordBeam,nBeam,10.0); frameCount = 0; tact = 0; if (showRD){ string gn = (grpName=="")?"main":grpName; string s = "Primed with "+ gn + "\n"; HPostMessage(HThreadSelf(),s.c_str()); } }
bool DtmfSession::OnChannelAdd(mrcp_channel_t* pMrcpChannel, mrcp_sig_status_code_e status) { if(!UmcSession::OnChannelAdd(pMrcpChannel,status)) return false; if(status != MRCP_SIG_STATUS_CODE_SUCCESS) { /* error case, just terminate the demo */ return Terminate(); } RecogChannel* pRecogChannel = (RecogChannel*) mrcp_application_channel_object_get(pMrcpChannel); if(pRecogChannel) { const mpf_audio_stream_t* pStream = mrcp_application_audio_stream_get(pMrcpChannel); if(pStream) { pRecogChannel->m_pDtmfGenerator = mpf_dtmf_generator_create(pStream,GetSessionPool()); } } return StartRecognition(pMrcpChannel); }
/* ProcessFile: process given file. If fn=NULL then direct audio */ Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable) { FILE *file; ParmBuf pbuf; BufferInfo pbinfo; NetNode *d; Lattice *lat; LArc *arc,*cur; LNode *node; Transcription *trans; MLink m; LogFloat lmlk,aclk; int s,j,tact,nFrames; LatFormat form; char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN]; Boolean enableOutput = TRUE, isPipe; if (fn!=NULL) strcpy(thisFN,fn); else if (fn==NULL && saveAudioOut) CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN); else enableOutput = FALSE; if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL) HError(3250,"ProcessFile: Config parameters invalid"); /* Check pbuf same as hset */ GetBufferInfo(pbuf,&pbinfo); if (pbinfo.tgtPK!=hset.pkind) HError(3231,"ProcessFile: Incompatible sample kind %s vs %s", ParmKind2Str(pbinfo.tgtPK,buf1), ParmKind2Str(hset.pkind,buf2)); if (pbinfo.a != NULL && replay) AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate))); StartRecognition(vri,net,lmScale,wordPen,prScale); SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam); tact=0;nFrames=0; StartBuffer(pbuf); while(BufferStatus(pbuf)!=PB_CLEARED) { ReadAsBuffer(pbuf,&obs); if (trace&T_OBS) PrintObservation(nFrames,&obs,13); if (hset.hsKind==DISCRETEHS){ for (s=1; s<=hset.swidth[0]; s++){ if( (obs.vq[s] < 1) || (obs.vq[s] > maxMixInS[s])) HError(3250,"ProcessFile: Discrete data value [ %d ] out of range in stream [ %d ] in file %s",obs.vq[s],s,fn); } } ProcessObservation(vri,&obs,-1,xfInfo.inXForm); if (trace & T_FRS) { for (d=vri->genMaxNode,j=0;j<30;d=d->links[0].node,j++) if (d->type==n_word) break; if (d->type==n_word){ if (d->info.pron==NULL) p=":bound:"; else p=d->info.pron->word->wordName->name; } else p=":external:"; m=FindMacroStruct(&hset,'h',vri->genMaxNode->info.hmm); printf("Optimum @%-4d HMM: %s (%s) %d %5.3f\n", vri->frame,m->id->name,p, vri->nact,vri->genMaxTok.like/vri->frame); fflush(stdout); } nFrames++; tact+=vri->nact; } lat=CompleteRecognition(vri,pbinfo.tgtSampRate/10000000.0,&ansHeap); if (lat==NULL) { if ((trace & T_TOP) && fn != NULL){ if (restartable) printf("No tokens survived to final node of network at beam %.1f\n", currGenBeam); else printf("No tokens survived to final node of network\n"); fflush(stdout); } else if (fn==NULL){ printf("Sorry [%d frames]?\n",nFrames);fflush(stdout); } if (pbinfo.a != NULL && replay) ReplayAudio(pbinfo); CloseBuffer(pbuf); return FALSE; } if (vri->noTokenSurvived && restartable) return FALSE; if (vri->noTokenSurvived && trace & T_TOP) { printf("No tokens survived to final node of network\n"); printf(" Output most likely partial hypothesis within network\n"); fflush(stdout); } lat->utterance=thisFN; lat->net=wdNetFn; lat->vocab=dictFn; if (trace & T_TOP || fn==NULL) { node=NULL; for (j=0;j<lat->nn;j++) { node=lat->lnodes+j; if (node->pred==NULL) break; node=NULL; } aclk=lmlk=0.0; while(node!=NULL) { for (arc=NULL,cur=node->foll;cur!=NULL;cur=cur->farc) arc=cur; if (arc==NULL) break; if (arc->end->word!=NULL) printf("%s ",arc->end->word->wordName->name); aclk+=arc->aclike+arc->prlike*lat->prscale; lmlk+=arc->lmlike*lat->lmscale+lat->wdpenalty; node=arc->end; } printf(" == [%d frames] %.4f [Ac=%.1f LM=%.1f] (Act=%.1f)\n",nFrames, (aclk+lmlk)/nFrames, aclk,lmlk,(float)tact/nFrames); fflush(stdout); } if (pbinfo.a != NULL && replay) ReplayAudio(pbinfo); /* accumulate stats for online unsupervised adaptation only if a token survived */ if ((lat != NULL) && (!vri->noTokenSurvived) && ((update > 0) || (xfInfo.useOutXForm))) DoOnlineAdaptation(lat, pbuf, nFrames); if (enableOutput){ if (nToks>1 && latExt!=NULL) { MakeFN(thisFN,labDir,latExt,lfn); if ((file=FOpen(lfn,NetOFilter,&isPipe))==NULL) HError(3211,"ProcessFile: Could not open file %s for lattice output",lfn); if (latForm==NULL) form=HLAT_DEFAULT; else { for (p=latForm,form=0;*p!=0;p++) { switch (*p) { case 'A': form|=HLAT_ALABS; break; case 'B': form|=HLAT_LBIN; break; case 't': form|=HLAT_TIMES; break; case 'v': form|=HLAT_PRON; break; case 'a': form|=HLAT_ACLIKE; break; case 'l': form|=HLAT_LMLIKE; break; case 'd': form|=HLAT_ALIGN; break; case 'm': form|=HLAT_ALDUR; break; case 'n': form|=HLAT_ALLIKE; break; case 'r': form|=HLAT_PRLIKE; break; } } } if(WriteLattice(lat,file,form)<SUCCESS) HError(3214,"ProcessFile: WriteLattice failed"); FClose(file,isPipe); } /* only output 1-best transcription if generating lattices */ if (nTrans > 1 && latExt != NULL) trans=TranscriptionFromLattice(&ansHeap,lat,1); /* output N-best transcriptions as usual */ else trans=TranscriptionFromLattice(&ansHeap,lat,nTrans); if (labForm!=NULL) FormatTranscription(trans,pbinfo.tgtSampRate,states,models, strchr(labForm,'X')!=NULL, strchr(labForm,'N')!=NULL,strchr(labForm,'S')!=NULL, strchr(labForm,'C')!=NULL,strchr(labForm,'T')!=NULL, strchr(labForm,'W')!=NULL,strchr(labForm,'M')!=NULL); MakeFN(thisFN,labDir,labExt,lfn); /* if(LSave(lfn,trans,ofmt)<SUCCESS) HError(3214,"ProcessFile: Cannot save file %s", lfn); */ LSave(lfn,trans,ofmt); Dispose(&ansHeap,trans); } Dispose(&ansHeap,lat); CloseBuffer(pbuf); if (trace & T_MMU){ printf("Memory State after utter %d\n",utterNum); PrintAllHeapStats(); } return !vri->noTokenSurvived; }