Ejemplo n.º 1
0
static void
feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
{
    agc_type_t agc_type = fcb->agc;

    if (!(beginutt && endutt)
        && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
        agc_type = AGC_EMAX;

    switch (agc_type) {
    case AGC_MAX:
        agc_max(fcb->agc_struct, mfc, nfr);
        break;
    case AGC_EMAX:
        agc_emax(fcb->agc_struct, mfc, nfr);
        if (endutt)
            agc_emax_update(fcb->agc_struct);
        break;
    case AGC_NOISE:
        agc_noise(fcb->agc_struct, mfc, nfr);
        break;
    default:
        ;
    }
    cep_dump_dbg(fcb, mfc, nfr, "After AGC");
}
Ejemplo n.º 2
0
Archivo: agc.c Proyecto: 10v/cmusphinx
void
agc(float32 *mfcc,
    uint32 n_frame)
{
    const char *agc_type = cmd_ln_access("-agc");
    uint32 i;

    if (strcmp(agc_type, "noise") == 0) {
	real_agc_noise(mfcc, n_frame, veclen);
    }
    else if (strcmp(agc_type, "max") == 0) {
	agc_max(mfcc, n_frame, veclen);
    }
    else if (strcmp(agc_type, "emax") == 0) {
	for (i = 0; i < n_frame; i++) {
	    agc_emax_proc(&mfcc[i*veclen], &mfcc[i*veclen],
			  veclen);
	}
    }
    else if (strcmp(agc_type, "none") == 0) {
	/* do nothing */
    }
    else if (agc_type == NULL) {
	E_WARN("no agc set\n");
	return ;
    }
    else {
	E_FATAL("unsupported agc type %s\n", agc_type);
    }
}
/*
 * Find Viterbi alignment.
 */
static void align_utt (char *sent,	/* In: Reference transcript */
		       float32 **mfc,	/* In: MFC cepstra for input utterance */
		       int32 nfr,	/* In: #frames of input */
		       char *ctlspec,	/* In: Utt specifiction from control file */
		       char *uttid)	/* In: Utterance id, for logging and other use */
{
    static float32 **feat = NULL;
    static int32 w;
    static int32 topn;
    static gauden_dist_t ***dist;
    static int32 *senscr;
    static s3senid_t *sen_active;
    static int8 *mgau_active;
    static char *s2stsegdir;
    static char *stsegdir;
    static char *phsegdir;
    static char *wdsegdir;
    
    int32 i, s, sid, gid, n_sen_active, best;
    char *arg;
    align_stseg_t *stseg;
    align_phseg_t *phseg;
    align_wdseg_t *wdseg;

    if (! feat) {
	/* One-time allocation of necessary intermediate variables */

	/* Allocate space for a feature vector */
	feat = (float32 **) ckd_calloc (n_feat, sizeof(float32 *));
	for (i = 0; i < n_feat; i++)
	    feat[i] = (float32 *) ckd_calloc (featlen[i], sizeof(float32));
	
	/* Allocate space for top-N codeword density values in a codebook */
	w = feat_window_size ();	/* #MFC vectors needed on either side of current
					   frame to compute one feature vector */
	topn = *((int32 *) cmd_ln_access("-topn"));
	if (topn > g->n_density) {
	    E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n",
		   topn, g->n_density);
	    topn = g->n_density;
	}
	dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, n_feat, topn,
						  sizeof(gauden_dist_t));
	
	/* Space for one frame of senone scores, and per frame active flags */
	senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32));
	sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t));
	mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8));

	/* Note various output directories */
	s2stsegdir = NULL;
	stsegdir = NULL;
	phsegdir = NULL;
	wdsegdir = NULL;
	if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL)
	    s2stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL)
	    stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL)
	    phsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL)
	    wdsegdir = (char *) ckd_salloc (arg);
    }
    
/* HACK HACKA HACK BHIKSHA 
    if (nfr <= (w<<1)) {
	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);
	return;
    }
 END HACK HACKA HACK */
    
    cyctimer_reset_all ();
    counter_reset_all ();
    
    timing_reset (tm_utt);
    timing_start (tm_utt);
    cyctimer_resume (tmr_utt);

    /* AGC and CMN */
    arg = (char *) cmd_ln_access ("-cmn");
    if (strcmp (arg, "current") == 0)
	norm_mean (mfc-4, nfr+8, cepsize); /* -4 HACKA HACK */
    arg = (char *) cmd_ln_access ("-agc");
    if (strcmp (arg, "max") == 0)
	agc_max (mfc, nfr);
    
    if (align_build_sent_hmm (sent) != 0) {
	align_destroy_sent_hmm ();
	cyctimer_pause (tmr_utt);

	E_ERROR("No sentence HMM; no alignment for %s\n", uttid);
	
	return;
    }
    
    align_start_utt (uttid);
    
    /*
     * A feature vector for frame f depends on input MFC vectors [f-w..f+w].  Hence
     * the feature vector corresponding to the first w and last w input frames is
     * undefined.  We define them by simply replicating the first and last true
     * feature vectors (presumably silence regions).
     */
    for (i = 0; i < nfr; i++) {
	cyctimer_resume (tmr_utt);
	
	/* Compute feature vector for current frame from input speech cepstra */
/* HACK HACKA HACK BHIKSHA 
	if (i < w)
	    feat_cep2feat (mfc+w, feat);
	else if (i >= nfr-w)
	    feat_cep2feat (mfc+(nfr-w-1), feat);
	else
END HACK HACKA HACK */
	    feat_cep2feat (mfc+i, feat);

	/*
	 * Evaluate gaussian density codebooks and senone scores for input codeword.
	 * Evaluate only active codebooks and senones.
	 */
	/* Obtain active senone flags */
	cyctimer_resume (tmr_senone);
	align_sen_active (sen_active, sen->n_sen);
	/* Flag all CI senones to active if interpolating */
	if (interp) {
	    for (s = 0; s < mdef->n_ci_sen; s++)
		sen_active[s] = 1;
	}
	/* Turn active flags into list (for faster access) */
	n_sen_active = 0;
	for (s = 0; s < mdef->n_sen; s++) {
	    if (sen_active[s])
		sen_active[n_sen_active++] = s;
	}
	cyctimer_pause (tmr_senone);
	
	/* Flag all active mixture-gaussian codebooks */
	cyctimer_resume (tmr_gauden);
	for (gid = 0; gid < g->n_mgau; gid++)
	    mgau_active[gid] = 0;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    mgau_active[sen->mgau[sid]] = 1;
	}
	
	/* Compute topn gaussian density values (for active codebooks) */
	for (gid = 0; gid < g->n_mgau; gid++)
	    if (mgau_active[gid])
		gauden_dist (g, gid, topn, feat, dist[gid]);
	cyctimer_pause (tmr_gauden);
	
	/* Evaluate active senones */
	cyctimer_resume (tmr_senone);
	best = (int32) 0x80000000;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn);
	    if (best < senscr[sid])
		best = senscr[sid];
	}
	if (interp) {
	    for (s = 0; s < n_sen_active; s++) {
		if ((sid = sen_active[s]) >= mdef->n_ci_sen)
		    interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]);
	    }
	}
	
	/* Normalize senone scores (interpolation above can only lower best score) */
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] -= best;
	}
	senscale[i] = best;
	cyctimer_pause (tmr_senone);
	
	/* Step alignment one frame forward */
	cyctimer_resume (tmr_align);
	align_frame (senscr);
	cyctimer_pause (tmr_align);
	
	cyctimer_pause (tmr_utt);
    }
    timing_stop (tm_utt);

    printf ("\n");

    /* Wind up alignment for this utterance */
    if (align_end_utt (&stseg, &phseg, &wdseg) < 0)
	E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);
    else {
	if (s2stsegdir)
	    write_s2stseg (s2stsegdir, stseg, uttid, ctlspec);
	if (stsegdir)
	    write_stseg (stsegdir, stseg, uttid, ctlspec);
	if (phsegdir)
	    write_phseg (phsegdir, phseg, uttid, ctlspec);
	if (wdsegdir)
	    write_wdseg (wdsegdir, wdseg, uttid, ctlspec);
	if (outsentfp)
	    write_outsent (outsentfp, wdseg, uttid);
    }
    
    align_destroy_sent_hmm ();
    
    cyctimer_print_all_norm (stdout, nfr*0.01, tmr_utt);
    counter_print_all (stdout);

    printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",
	   nfr,
	   tm_utt->t_cpu, tm_utt->t_cpu * 100.0 / nfr,
	   tm_utt->t_elapsed, tm_utt->t_elapsed * 100.0 / nfr);

    tot_nfr += nfr;
}
Ejemplo n.º 4
0
Archivo: main.c Proyecto: 10v/cmusphinx
static void decode_utt (void *data, char *uttfile, int32 sf, int32 ef, char *uttid)
{
    kb_t *kb;
    acoustic_t *am;
    int32 featwin, nfr, min_utt_frames, n_vithist;
    char cepfile[4096], latfile[4096];
    vithist_t *finalhist;
    int32 i, f;
    glist_t hyplist;
    FILE *latfp;

    printf ("\n");
    fflush (stdout);
    E_INFO("Utterance %s\n", uttid);
    
    kb = (kb_t *)data;
    am = kb->am;
    featwin = feat_window_size(am->fcb);
    
    /* Build complete cepfile name and read cepstrum data; check for min length */
    ctl_infile (cepfile, cmd_ln_str("-cepdir"), cmd_ln_str("-cepext"), uttfile);

    if ((nfr = s2mfc_read (cepfile, sf, ef, featwin, am->mfc, S3_MAX_FRAMES)) < 0) {
	E_ERROR("%s: MFC read failed\n", uttid);
	return;
    }
    E_INFO("%s: %d frames\n", uttid, nfr-(featwin<<1));
    
    ptmr_reset (kb->tm);
    ptmr_reset (kb->tm_search);
    ptmr_start (kb->tm);
    
    min_utt_frames = (featwin<<1) + 1;
    if (nfr < min_utt_frames) {
	E_ERROR("%s: Utterance shorter than %d frames; ignored\n",
		uttid, min_utt_frames, nfr);
	return;
    }
    
    /* CMN/AGC */
    if (strcmp (cmd_ln_str("-cmn"), "current") == 0)
	cmn (am->mfc, nfr, feat_cepsize(am->fcb));
    if (strcmp (cmd_ln_str("-agc"), "max") == 0)
	agc_max (am->mfc, nfr);
    
    /* Process utterance */
    lextree_vit_start (kb, uttid);
    for (i = featwin, f = 0; i < nfr-featwin; i++, f++) {
	am->senscale[f] = acoustic_eval (am, i);
	
	ptmr_start (kb->tm_search);
	
	lextree_vit_frame (kb, f, uttid);
	printf (" %d,%d,%d", f, glist_count (kb->vithist[f]), glist_count (kb->lextree_active));
	fflush (stdout);
	
	ptmr_stop (kb->tm_search);
    }
    printf ("\n");
    finalhist = lextree_vit_end (kb, f, uttid);
    
    hyplist = vithist_backtrace (finalhist, kb->am->senscale);
    hyp_log (stdout, hyplist, _dict_wordstr, (void *)kb->dict);
    hyp_myfree (hyplist);
    printf ("\n");
    
    /* Log the entire Viterbi word lattice */
    sprintf (latfile, "%s.lat", uttid);
    if ((latfp = fopen(latfile, "w")) == NULL) {
	E_ERROR("fopen(%s,w) failed; using stdout\n", latfile);
	latfp = stdout;
    }
    n_vithist = vithist_log (latfp, kb->vithist, f, _dict_wordstr, (void *)kb->dict);
    if (latfp != stdout)
	fclose (latfp);
    else {
	printf ("\n");
	fflush (stdout);
    }
    
    ptmr_stop (kb->tm);
    if (f > 0) {
	printf("TMR(%s): %5d frames; %.1fs CPU, %.2f xRT; %.1fs CPU(search), %.2f xRT; %.1fs Elapsed, %.2f xRT\n",
	       uttid, f,
	       kb->tm->t_cpu, kb->tm->t_cpu * 100.0 / f,
	       kb->tm_search->t_cpu, kb->tm_search->t_cpu * 100.0 / f,
	       kb->tm->t_elapsed, kb->tm->t_elapsed * 100.0 / f);
	printf("CTR(%s): %5d frames; %d Sen (%.1f/fr); %d HMM (%.1f/fr); %d Words (%.1f/fr)\n",
	       uttid, f,
	       kb->n_sen_eval, ((float64)kb->n_sen_eval) / f,
	       kb->n_hmm_eval, ((float64)kb->n_hmm_eval) / f,
	       n_vithist, ((float64) n_vithist) / f);
    }
    
    /* Cleanup */
    glist_free (kb->lextree_active);
    kb->lextree_active = NULL;
    for (; f >= -1; --f) {	/* I.e., including dummy START_WORD node at frame -1 */
	glist_myfree (kb->vithist[f], sizeof(vithist_t));
	kb->vithist[f] = NULL;
    }
    
    lm_cache_reset (kb->lm);
}