/* Accumulate stats from an utterance file */ SpkrAcc *AccGenUtt(char *SpkrPattern, char *UttFileName, SpkrAcc *sa) { char SpkrName[MAXSTRLEN]; ParmBuf pbuf; BufferInfo info; short swidth[SMAX]; Boolean eSep; Vector tempV; int i; if (MaskMatch(SpkrPattern,SpkrName,UttFileName)==TRUE){ /* open buffer and construct observation */ pbuf = OpenBuffer(&iStack,UttFileName,0,dff,FALSE_dup,FALSE_dup); GetBufferInfo(pbuf,&info); if ((info.tgtPK & HASZEROM) && strchr(oflags,'m')) { HError(-2021,"HCompV: AccGenUtt: qualifier _Z not appropriate when calculating means!\n"); } /* treat as single stream system though a bit weird */ ZeroStreamWidths(1,swidth); SetStreamWidths(info.tgtPK,info.tgtVecSize,swidth,&eSep); obs = MakeObservation(&gstack,swidth,info.tgtPK,FALSE,eSep); if (info.tgtVecSize != vSize){ vSize = info.tgtVecSize; /* if needed init a SpkrAcc */ sa = InitSpkrAcc(); fprintf(stdout,"Target observation vector size set to %d ......\n",info.tgtVecSize); fflush(stdout); } ParmKind2Str(info.tgtPK,TargetPKStr); /* accumulate stats for current utterance file */ StartBuffer(pbuf); while (BufferStatus(pbuf) != PB_CLEARED) { /* copy current observation and set vector ptr to first stream */ ReadAsBuffer(pbuf,&obs); tempV = obs.fv[1]; for (i=1;i<=vSize;i++){ sa->meanSum[i] += tempV[i]; sa->squareSum[i] += tempV[i]*tempV[i]; } sa->NumFrame += 1; } CloseBuffer(pbuf); strcpy(sa->SpkrName,SpkrName); if (trace&T_CMV){ fprintf(stdout,"Utterance %s accumulate generated for speaker %s\n",UttFileName,sa->SpkrName); fflush(stdout); } ResetHeap(&iStack); return sa; } else { HError(2039,"HCompV: AccGenUtt: speaker pattern matching failure on file: %s\n",UttFileName); return NULL; } }
barwidth = itemWidth*(nItems+1); } /* PrintHeading: print the info in given HeadInfo record */ void PrintHeading(HeadInfo h) { char buf[MAXSTRLEN]; if (h.isSource){ if (h.isAudio) strcpy(buf,"Source: Direct Audio"); else sprintf(buf,"Source: %s", h.name); }else strcpy(buf,"Target"); PrBar(buf); printf(" Sample Bytes: %-7d", h.sampSize); if (barwidth < 60 ) printf("\n"); printf(" Sample Kind: %s\n", ParmKind2Str(h.kind,buf)); printf(" Num Comps: %-7d", h.numComps); if (barwidth < 60 ) printf("\n"); printf(" Sample Period: %.1f us\n", h.period/10.0); if (!h.isAudio) { printf(" Num Samples: %-7ld", h.nSamples); if (barwidth < 60 ) printf("\n"); printf(" File Format: %s\n", Format2Str(h.fmt)); } } /* PrintWaveLine: print line of waveform samples */ void PrintWaveLine(short *data, int nItems, long idx) { int i; if (!rawOut) printf("%5ld: ",idx); for (i=0; i<nItems; i++) printf("%7d",*data++); printf("\n"); } /* PrintRawVec: print vector components */ void PrintRawVec(Vector v) { int i; for (i=1; i<=VectorSize(v); i++) printf("%e ",v[i]); printf("\n");
/* ProcessFile: process given file. If fn=NULL then direct audio */ Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable) { FILE *file; ParmBuf pbuf; BufferInfo pbinfo; NetNode *d; Lattice *lat; LArc *arc,*cur; LNode *node; Transcription *trans; MLink m; LogFloat lmlk,aclk; int s,j,tact,nFrames; LatFormat form; char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN]; Boolean enableOutput = TRUE, isPipe; if (fn!=NULL) strcpy(thisFN,fn); else if (fn==NULL && saveAudioOut) CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN); else enableOutput = FALSE; if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL) HError(3250,"ProcessFile: Config parameters invalid"); /* Check pbuf same as hset */ GetBufferInfo(pbuf,&pbinfo); if (pbinfo.tgtPK!=hset.pkind) HError(3231,"ProcessFile: Incompatible sample kind %s vs %s", ParmKind2Str(pbinfo.tgtPK,buf1), ParmKind2Str(hset.pkind,buf2)); if (pbinfo.a != NULL && replay) AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate))); StartRecognition(vri,net,lmScale,wordPen,prScale); SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam); tact=0;nFrames=0; StartBuffer(pbuf); while(BufferStatus(pbuf)!=PB_CLEARED) { ReadAsBuffer(pbuf,&obs); if (trace&T_OBS) PrintObservation(nFrames,&obs,13); if (hset.hsKind==DISCRETEHS){ for (s=1; s<=hset.swidth[0]; s++){ if( (obs.vq[s] < 1) || (obs.vq[s] > maxMixInS[s])) HError(3250,"ProcessFile: Discrete data value [ %d ] out of range in stream [ %d ] in file %s",obs.vq[s],s,fn); } } ProcessObservation(vri,&obs,-1,xfInfo.inXForm); if (trace & T_FRS) { for (d=vri->genMaxNode,j=0;j<30;d=d->links[0].node,j++) if (d->type==n_word) break; if (d->type==n_word){ if (d->info.pron==NULL) p=":bound:"; else p=d->info.pron->word->wordName->name; } else p=":external:"; m=FindMacroStruct(&hset,'h',vri->genMaxNode->info.hmm); printf("Optimum @%-4d HMM: %s (%s) %d %5.3f\n", vri->frame,m->id->name,p, vri->nact,vri->genMaxTok.like/vri->frame); fflush(stdout); } nFrames++; tact+=vri->nact; } lat=CompleteRecognition(vri,pbinfo.tgtSampRate/10000000.0,&ansHeap); if (lat==NULL) { if ((trace & T_TOP) && fn != NULL){ if (restartable) printf("No tokens survived to final node of network at beam %.1f\n", currGenBeam); else printf("No tokens survived to final node of network\n"); fflush(stdout); } else if (fn==NULL){ printf("Sorry [%d frames]?\n",nFrames);fflush(stdout); } if (pbinfo.a != NULL && replay) ReplayAudio(pbinfo); CloseBuffer(pbuf); return FALSE; } if (vri->noTokenSurvived && restartable) return FALSE; if (vri->noTokenSurvived && trace & T_TOP) { printf("No tokens survived to final node of network\n"); printf(" Output most likely partial hypothesis within network\n"); fflush(stdout); } lat->utterance=thisFN; lat->net=wdNetFn; lat->vocab=dictFn; if (trace & T_TOP || fn==NULL) { node=NULL; for (j=0;j<lat->nn;j++) { node=lat->lnodes+j; if (node->pred==NULL) break; node=NULL; } aclk=lmlk=0.0; while(node!=NULL) { for (arc=NULL,cur=node->foll;cur!=NULL;cur=cur->farc) arc=cur; if (arc==NULL) break; if (arc->end->word!=NULL) printf("%s ",arc->end->word->wordName->name); aclk+=arc->aclike+arc->prlike*lat->prscale; lmlk+=arc->lmlike*lat->lmscale+lat->wdpenalty; node=arc->end; } printf(" == [%d frames] %.4f [Ac=%.1f LM=%.1f] (Act=%.1f)\n",nFrames, (aclk+lmlk)/nFrames, aclk,lmlk,(float)tact/nFrames); fflush(stdout); } if (pbinfo.a != NULL && replay) ReplayAudio(pbinfo); /* accumulate stats for online unsupervised adaptation only if a token survived */ if ((lat != NULL) && (!vri->noTokenSurvived) && ((update > 0) || (xfInfo.useOutXForm))) DoOnlineAdaptation(lat, pbuf, nFrames); if (enableOutput){ if (nToks>1 && latExt!=NULL) { MakeFN(thisFN,labDir,latExt,lfn); if ((file=FOpen(lfn,NetOFilter,&isPipe))==NULL) HError(3211,"ProcessFile: Could not open file %s for lattice output",lfn); if (latForm==NULL) form=HLAT_DEFAULT; else { for (p=latForm,form=0;*p!=0;p++) { switch (*p) { case 'A': form|=HLAT_ALABS; break; case 'B': form|=HLAT_LBIN; break; case 't': form|=HLAT_TIMES; break; case 'v': form|=HLAT_PRON; break; case 'a': form|=HLAT_ACLIKE; break; case 'l': form|=HLAT_LMLIKE; break; case 'd': form|=HLAT_ALIGN; break; case 'm': form|=HLAT_ALDUR; break; case 'n': form|=HLAT_ALLIKE; break; case 'r': form|=HLAT_PRLIKE; break; } } } if(WriteLattice(lat,file,form)<SUCCESS) HError(3214,"ProcessFile: WriteLattice failed"); FClose(file,isPipe); } /* only output 1-best transcription if generating lattices */ if (nTrans > 1 && latExt != NULL) trans=TranscriptionFromLattice(&ansHeap,lat,1); /* output N-best transcriptions as usual */ else trans=TranscriptionFromLattice(&ansHeap,lat,nTrans); if (labForm!=NULL) FormatTranscription(trans,pbinfo.tgtSampRate,states,models, strchr(labForm,'X')!=NULL, strchr(labForm,'N')!=NULL,strchr(labForm,'S')!=NULL, strchr(labForm,'C')!=NULL,strchr(labForm,'T')!=NULL, strchr(labForm,'W')!=NULL,strchr(labForm,'M')!=NULL); MakeFN(thisFN,labDir,labExt,lfn); /* if(LSave(lfn,trans,ofmt)<SUCCESS) HError(3214,"ProcessFile: Cannot save file %s", lfn); */ LSave(lfn,trans,ofmt); Dispose(&ansHeap,trans); } Dispose(&ansHeap,lat); CloseBuffer(pbuf); if (trace & T_MMU){ printf("Memory State after utter %d\n",utterNum); PrintAllHeapStats(); } return !vri->noTokenSurvived; }