Ejemplo n.º 1
0
int
acmod_process_feat(acmod_t *acmod,
                   mfcc_t **feat)
{
    int i, inptr;

    if (acmod->n_feat_frame == acmod->n_feat_alloc) {
        if (acmod->grow_feat)
            acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
        else
            return 0;
    }

    if (acmod->grow_feat) {
        /* Grow to avoid wraparound if grow_feat == TRUE. */
        inptr = acmod->feat_outidx + acmod->n_feat_frame;
        while (inptr + 1 >= acmod->n_feat_alloc)
            acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
    }
    else {
        inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
    }
    for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
        memcpy(acmod->feat_buf[inptr][i],
               feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
    ++acmod->n_feat_frame;
    assert(acmod->n_feat_frame <= acmod->n_feat_alloc);

    return 1;
}
Ejemplo n.º 2
0
void
feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
{
    uint32 i, j, k;

    for (i = 0; i < nfr; i++) {
        fprintf(fp, "%8d:\n", i);

        for (j = 0; j < feat_dimension1(fcb); j++) {
            fprintf(fp, "\t%2d:", j);

            for (k = 0; k < feat_dimension2(fcb, j); k++)
                fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
            fprintf(fp, "\n");
        }
    }

    fflush(fp);
}
Ejemplo n.º 3
0
void
feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
{
    int32 i, j, k;
    for (i = 0; i < nfr; i++) {
#ifndef POCKETSPHINX_NET
        fprintf(fp, "%8d:\n", i);
#else
		net_fprintf(fp, "{0:8}:\n", i);
#endif
        for (j = 0; j < feat_dimension1(fcb); j++) {
#ifndef POCKETSPHINX_NET
            fprintf(fp, "\t%2d:", j);
#else
			net_fprintf(fp, "\t{0:2}:\n", j);
#endif
            for (k = 0; k < (int32)feat_dimension2(fcb, j); k++)
			{
#ifndef POCKETSPHINX_NET
                fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
#else
				//need check
				net_fprintf(fp, " {0:8.4}", MFCC2FLOAT(feat[i][j][k]));
#endif
			}
#ifndef POCKETSPHINX_NET
            fprintf(fp, "\n");
#else
			net_fprintf(fp, "\n");
#endif

        }
    }


    fflush(fp);
}
Ejemplo n.º 4
0
int
agg_phn_seg(lexicon_t *lex,
	    acmod_set_t *acmod_set,
	    feat_t *fcb,
	    segdmp_type_t type)
{
    uint16 *seg;
    vector_t *mfcc;
    vector_t **feat;
    int32 n_frame;
    uint32 tick_cnt;

    acmod_id_t *phone;
    uint32 *start;
    uint32 *len;
    uint32 n_phone;
    uint32 s;
    char *btw_mark;

    char *trans;
    char **word;
    uint32 n_word;
    int32 mfc_veclen = cmd_ln_int32("-ceplen");

    uint32 n_stream;
    uint32 *veclen;

    tick_cnt = 0;

    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);

    while (corpus_next_utt()) {
	if ((++tick_cnt % 500) == 0) {
	    E_INFOCONT("[%u] ", tick_cnt);
	}

	if (corpus_get_sent(&trans) != S3_SUCCESS) {
	    E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name());
	}

	if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) {
	    E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name());
	}
	    
	n_word = str2words(trans, NULL, 0);
	word = ckd_calloc(n_word, sizeof(char*));
	str2words(trans, word, n_word);

	phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	/* check to see whether the word transcript and dictionary entries
	   agree with the state segmentation */
	if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);
	    
	    E_ERROR("ck_seg failed");

	    continue;
	}

	if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);		/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("cvt2triphone failed");
	    
	    continue;
	}

	ckd_free(btw_mark);

	if (mk_seg(acmod_set,
		   seg,
		   n_frame,
		   phone,
		   start,
		   len,
		   n_phone) != S3_SUCCESS) {
	    free(trans);
	    free(seg);
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("mk_seg failed");
	    continue;
	}
	
	if (corpus_provides_mfcc()) {
    	        if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
		      E_FATAL("Can't read input features from %s\n", corpus_utt());
		}
		
		if (n_frame < 9) {
		  E_WARN("utt %s too short\n", corpus_utt());
		  if (mfcc) {
		    ckd_free(mfcc[0]);
		    ckd_free(mfcc);
		    mfcc = NULL;
		  }
		  continue;
		}

		feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	        feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

		for (s = 0; s < n_phone; s++) {
		    segdmp_add_feat(phone[s],
				    &feat[start[s]],
				    len[s]);
		}

		feat_array_free(feat);
		free(&mfcc[0][0]);
		ckd_free(mfcc);
	}
	else {
	    E_FATAL("No data type specified\n");
	}

	free(trans);	/* alloc'ed using strdup, not ckd_*() */
	free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	ckd_free(word);
	ckd_free(phone);
	ckd_free(start);
	ckd_free(len);
    }

    return 0;
}
Ejemplo n.º 5
0
ps_mgau_t *
ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef)
{
    /* Codebooks */
    ms_mgau_model_t *msg;
    ps_mgau_t *mg;
    gauden_t *g;
    senone_t *s;
    cmd_ln_t *config;
    int i;

	static ps_mgaufuncs_t ms_mgau_funcs = {
		"ms",
		ms_cont_mgau_frame_eval, /* frame_eval */
		ms_mgau_mllr_transform,  /* transform */
		ms_mgau_free             /* free */
	};


    config = acmod->config;

    msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
    msg->config = config;
    msg->g = 0;
    msg->s = 0;
    
    g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"),
                             cmd_ln_str_r(config, "-var"),
                             cmd_ln_float32_r(config, "-varfloor"),
                             lmath);

    /* Verify n_feat and veclen, against acmod. */
    if (g->n_feat != feat_dimension1(acmod->fcb)) {
        E_ERROR("Number of streams does not match: %d != %d\n",
                g->n_feat, feat_dimension1(acmod->fcb));
        goto error_out;
    }
    for (i = 0; i < g->n_feat; ++i) {
        if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
            E_ERROR("Dimension of stream %d does not match: %d != %d\n", i,
                    g->featlen[i], feat_dimension2(acmod->fcb, i));
            goto error_out;
        }
    }

    s = msg->s = senone_init(msg->g,
                             cmd_ln_str_r(config, "-mixw"),
                             cmd_ln_str_r(config, "-senmgau"),
                             cmd_ln_float32_r(config, "-mixwfloor"),
                             lmath, mdef);

    s->aw = cmd_ln_int32_r(config, "-aw");

    /* Verify senone parameters against gauden parameters */
    if (s->n_feat != g->n_feat)
        E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
                s->n_feat);
    if (s->n_cw != g->n_density)
        E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
                g->n_density, s->n_cw);
    if ((int)s->n_gauden > g->n_mgau)
        E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
                s->n_gauden, g->n_mgau);
    if ((int)s->n_gauden < g->n_mgau)
        E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
                s->n_gauden, g->n_mgau);

    msg->topn = cmd_ln_int32_r(config, "-topn");
    E_INFO("The value of topn: %d\n", msg->topn);
    if (msg->topn == 0 || msg->topn > msg->g->n_density) {
        E_WARN
            ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
             msg->topn, msg->g->n_density);
        msg->topn = msg->g->n_density;
    }

    msg->dist = (gauden_dist_t ***)
        ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
                      sizeof(gauden_dist_t));
    msg->mgau_active = (uint8*)ckd_calloc(g->n_mgau, sizeof(int8));

    mg = (ps_mgau_t *)msg;
    mg->vt = &ms_mgau_funcs;
    return mg;
error_out:
    ms_mgau_free(ps_mgau_base(msg));
    return 0;    
}
Ejemplo n.º 6
0
int
main(int argc, char *argv[])
{
    lexicon_t *lex;
    model_def_t *omdef;
    model_def_t *dmdef;
    feat_t *feat;
    uint32 n_stream, blksize;
    uint32 *veclen;
    uint32 ts_off;
    uint32 ts_cnt;
    FILE *fp;

    if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) {
	return -1;
    }

    n_stream = feat_dimension1(feat);
    veclen = feat_stream_lengths(feat);
    blksize = feat_dimension(feat);

    if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) {
	ts_off = cmd_ln_int32("-tsoff");

	if (cmd_ln_str("-tscnt") == NULL) {
	    ts_cnt = omdef->n_tied_state - ts_off;
 	}
	else {
	    ts_cnt = cmd_ln_int32("-tscnt");
	}

	if (ts_off + ts_cnt > omdef->n_tied_state) {
	    E_FATAL("Too many tied states specified\n");
	}

	n_tot_frame = 0;

	ptmr_reset(&all_timer);
	ptmr_reset(&km_timer);
	ptmr_reset(&var_timer);
	ptmr_reset(&em_timer);
	ptmr_start(&all_timer);

	if (init_state(cmd_ln_str("-segdmpfn"),
		       cmd_ln_str("-segidxfn"),
		       cmd_ln_int32("-ndensity"),
		       n_stream,
		       veclen,
		       blksize,
		       cmd_ln_int32("-reest"),
		       cmd_ln_str("-mixwfn"),
		       cmd_ln_str("-meanfn"),
		       cmd_ln_str("-varfn"),
		       ts_off,
		       ts_cnt,
		       omdef->n_tied_state,
		       (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state))
		       != S3_SUCCESS) {
	    E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1);
	}
	ptmr_stop(&all_timer);

	if (n_tot_frame > 0) {
	    E_INFO("TOTALS:");
    	    E_INFOCONT(" km %4.3fx %4.3e", 
	    	km_timer.t_cpu / (n_tot_frame * 0.01),
		(km_timer.t_cpu > 0 ?
		 km_timer.t_elapsed / km_timer.t_cpu : 0.0));
    	    E_INFOCONT(" var %4.3fx %4.3e", 
		var_timer.t_cpu / (n_tot_frame * 0.01),
		(var_timer.t_cpu > 0 ?
		 var_timer.t_elapsed / var_timer.t_cpu : 0.0));
	    E_INFOCONT(" em %4.3fx %4.3e", 
		em_timer.t_cpu / (n_tot_frame * 0.01),
		(em_timer.t_cpu > 0 ?
		 em_timer.t_elapsed / em_timer.t_cpu : 0.0));
    	    E_INFOCONT(" all %4.3fx %4.3e", 
	    	all_timer.t_cpu / (n_tot_frame * 0.01),
		(all_timer.t_cpu > 0 ?
		 all_timer.t_elapsed / all_timer.t_cpu : 0.0));
	    E_INFOCONT("\n");
	}
	
	if (cmd_ln_str("-tsrngfn") != NULL) {
	    fp = fopen(cmd_ln_str("-tsrngfn"),
		       "w");
	    if (fp == NULL) {
		E_FATAL_SYSTEM("Unable to open %s for reading",
			       cmd_ln_str("-tsrngfn"));
	    }
	    
	    fprintf(fp, "%d %d\n", ts_off, ts_cnt);
	}
	else if (ts_cnt != omdef->n_tied_state) {
	    E_WARN("Subset of tied states specified, but no -tsrngfn arg");
	}
    }
    else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) {
	n_tot_frame = 0;

	ptmr_reset(&all_timer);
	ptmr_reset(&km_timer);
	ptmr_reset(&var_timer);
	ptmr_reset(&em_timer);

	ptmr_start(&all_timer);
	
	if (init_state(cmd_ln_str("-segdmpfn"),
		       NULL,	/* No index -> single class dump file */
		       cmd_ln_int32("-ndensity"),
		       n_stream,
		       veclen,
		       blksize,
		       cmd_ln_int32("-reest"),
		       cmd_ln_str("-mixwfn"),
		       cmd_ln_str("-meanfn"),
		       cmd_ln_str("-varfn"),
		       0,
		       1,
		       1,
		       1) != S3_SUCCESS) {
	    E_ERROR("Unable to train\n");
	}
	ptmr_stop(&all_timer);

	if (n_tot_frame > 0) {
	    E_INFO("TOTALS:");
    	    E_INFOCONT(" km %4.3fx %4.3e", 
		km_timer.t_cpu / (n_tot_frame * 0.01),
		(km_timer.t_cpu > 0 ?
		 km_timer.t_elapsed / km_timer.t_cpu : 0.0));
	    E_INFOCONT(" var %4.3fx %4.3e", 
		var_timer.t_cpu / (n_tot_frame * 0.01),
		(var_timer.t_cpu > 0 ?
		 var_timer.t_elapsed / var_timer.t_cpu : 0.0));
	    E_INFOCONT(" em %4.3fx %4.3e", 
		em_timer.t_cpu / (n_tot_frame * 0.01),
		(em_timer.t_cpu > 0 ?
		 em_timer.t_elapsed / em_timer.t_cpu : 0.0));
	    E_INFOCONT(" all %4.3fx %4.3e", 
    		all_timer.t_cpu / (n_tot_frame * 0.01),
		(all_timer.t_cpu > 0 ?
		 all_timer.t_elapsed / all_timer.t_cpu : 0.0));
	    E_INFOCONT("\n");
	}
    }

    return 0;
}
Ejemplo n.º 7
0
s2_semi_mgau_t *
s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, feat_t *fcb, mdef_t *mdef)
{
    s2_semi_mgau_t *s;
    char const *sendump_path;
    float32 **fgau;
    int i;

    s = ckd_calloc(1, sizeof(*s));
    s->config = config;

    s->lmath = logmath_retain(lmath);
    /* Log-add table. */
    s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
    if (s->lmath_8b == NULL) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
    if (logmath_get_width(s->lmath_8b) != 1) {
        E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
                logmath_get_base(s->lmath_8b));
        s2_semi_mgau_free(s);
        return NULL;
    }

    /* Inherit stream dimensions from acmod, will be checked below. */
    s->n_feat = feat_dimension1(fcb);
    s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
    for (i = 0; i < s->n_feat; ++i)
        s->veclen[i] = feat_dimension2(fcb, i);

    /* Read means and variances. */
    if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    s->means = (mfcc_t **)fgau;
    if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    s->vars = (mfcc_t **)fgau;

    /* Precompute (and fixed-point-ize) means, variances, and determinants. */
    s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets));
    s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));

    /* Read mixture weights */
    if ((sendump_path = cmd_ln_str_r(s->config, "-sendump")))
        read_sendump(s, mdef, sendump_path);
    else
        read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
                  cmd_ln_float32_r(s->config, "-mixwfloor"));
    s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");

    /* Determine top-N for each feature */
    s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
    s->max_topn = cmd_ln_int32_r(s->config, "-topn");
    split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
    E_INFO("Maximum top-N: %d ", s->max_topn);
    E_INFOCONT("Top-N beams:");
    for (i = 0; i < s->n_feat; ++i) {
        E_INFOCONT(" %d", s->topn_beam[i]);
    }
    E_INFOCONT("\n");

    /* Top-N scores from recent frames */
    s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
    s->topn_hist = (vqFeature_t ***)
        ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
                      sizeof(***s->topn_hist));
    s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
                                   sizeof(**s->topn_hist_n));
    for (i = 0; i < s->n_topn_hist; ++i) {
        int j;
        for (j = 0; j < s->n_feat; ++j) {
            int k;
            for (k = 0; k < s->max_topn; ++k) {
                s->topn_hist[i][j][k].score = WORST_DIST;
                s->topn_hist[i][j][k].codeword = k;
            }
        }
    }

    return s;
}
int
agg_all_seg(feat_t *fcb,
	    segdmp_type_t type,
	    const char *fn,
	    uint32 stride)
{
    uint32 seq_no;
    vector_t *mfcc = NULL;
    uint32 mfc_veclen = cmd_ln_int32("-ceplen");
    uint32 n_frame;
    uint32 n_out_frame;
    uint32 blksz=0;
    vector_t **feat = NULL;
    uint32 i, j;
    uint32 t;
    uint32 n_stream;
    const uint32 *veclen;
    FILE *fp;
    uint32 ignore = 0;
    long start;
    int32 no_retries=0;
    
    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);
    for (i = 0, blksz = 0; i < n_stream; i++)
        blksz += veclen[i];

    fp = open_dmp(fn);

    start = ftell(fp);

    if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");

	return S3_ERROR;
    }

    for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0;
	 corpus_next_utt(); seq_no++) {
	    if (mfcc) {
		free(mfcc[0]);
		ckd_free(mfcc);

		mfcc = NULL;
	    }

	    /* get the MFCC data for the utterance */
	    if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
	      E_FATAL("Can't read input features from %s\n", corpus_utt());
	    }

	if ((seq_no % 1000) == 0) {
	    E_INFO("[%u]\n", seq_no);
	}

	    if (feat) {
		feat_array_free(feat);
		feat = NULL;
	    }
	    
	    if (n_frame < 9) {
	      E_WARN("utt %s too short\n", corpus_utt());
	      if (mfcc) {
		ckd_free(mfcc[0]);
		ckd_free(mfcc);
		mfcc = NULL;
	      }
	      continue;
	    }

	    feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	    feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

	    for (t = 0; t < n_frame; t++, j++) {
		if ((j % stride) == 0) {
		    while (s3write(&feat[t][0][0],
				   sizeof(float32),
				   blksz,
				   fp, &ignore) != blksz) {
			static int rpt = 0;

			if (!rpt) {
			    E_ERROR_SYSTEM("Unable to write to dmp file");
			    E_INFO("sleeping...\n");
			    no_retries++;
			}
			sleep(3);

			if(no_retries > 10){
			  E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n ");
			}
		    }
		    ++n_out_frame;
		}
	    }
    }

    if (fseek(fp, start, SEEK_SET) < 0) {
	E_ERROR_SYSTEM("Unable to seek to begin of dmp");

	return S3_ERROR;
    }

    E_INFO("Wrote %u frames to %s\n", n_out_frame, fn);

    if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");
	
	return S3_ERROR;
    }

    return S3_SUCCESS;
}