コード例 #1
0
int
mk_phone_seq(acmod_id_t **out_phone,
	     uint32 *out_n_phone,
	     char *trans,
	     acmod_set_t *acmod_set,
	     lexicon_t *lex)
{
    char **word;
    uint32 n_word;
    uint32 n_phone;
    acmod_id_t *phone;
    char *btw_mark;

    word = mk_wordlist(trans, &n_word);
    phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
    if (phone == NULL) {
	E_WARN("Unable to produce phonetic transcription for the utterance '%s'.\n", corpus_utt());
    }

    cvt2triphone(acmod_set, phone, btw_mark, n_phone);

    ckd_free(btw_mark);
    ckd_free(word);

    *out_phone = phone;
    *out_n_phone = n_phone;

    return S3_SUCCESS;
}
コード例 #2
0
ファイル: corpus.c プロジェクト: lliuguangbo/sphinxtrain
int
corpus_set_interval(uint32 n_skip,
		    uint32 run_len)
{
    sv_n_skip = n_skip;
    sv_run_len = run_len;

    if (n_skip) {
	E_INFO("skipping %d utts.\n", n_skip);
	
	for (begin = 0; (n_skip > 0) && corpus_next_utt(); --n_skip, begin++);
	
	E_INFO("Last utt skipped: %s\n", corpus_utt());
    }
    
    if (run_len != UNTIL_EOF)
	n_run = run_len;

    n_proc = 0;
    
    return S3_SUCCESS;
}
コード例 #3
0
ファイル: agg_phn_seg.c プロジェクト: Ankit77/cmusphinx
int
agg_phn_seg(lexicon_t *lex,
	    acmod_set_t *acmod_set,
	    feat_t *fcb,
	    segdmp_type_t type)
{
    uint16 *seg;
    vector_t *mfcc;
    vector_t **feat;
    int32 n_frame;
    uint32 tick_cnt;

    acmod_id_t *phone;
    uint32 *start;
    uint32 *len;
    uint32 n_phone;
    uint32 s;
    char *btw_mark;

    char *trans;
    char **word;
    uint32 n_word;
    int32 mfc_veclen = cmd_ln_int32("-ceplen");

    uint32 n_stream;
    uint32 *veclen;

    tick_cnt = 0;

    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);

    while (corpus_next_utt()) {
	if ((++tick_cnt % 500) == 0) {
	    E_INFOCONT("[%u] ", tick_cnt);
	}

	if (corpus_get_sent(&trans) != S3_SUCCESS) {
	    E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name());
	}

	if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) {
	    E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name());
	}
	    
	n_word = str2words(trans, NULL, 0);
	word = ckd_calloc(n_word, sizeof(char*));
	str2words(trans, word, n_word);

	phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	/* check to see whether the word transcript and dictionary entries
	   agree with the state segmentation */
	if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);
	    
	    E_ERROR("ck_seg failed");

	    continue;
	}

	if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);		/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("cvt2triphone failed");
	    
	    continue;
	}

	ckd_free(btw_mark);

	if (mk_seg(acmod_set,
		   seg,
		   n_frame,
		   phone,
		   start,
		   len,
		   n_phone) != S3_SUCCESS) {
	    free(trans);
	    free(seg);
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("mk_seg failed");
	    continue;
	}
	
	if (corpus_provides_mfcc()) {
    	        if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
		      E_FATAL("Can't read input features from %s\n", corpus_utt());
		}
		
		if (n_frame < 9) {
		  E_WARN("utt %s too short\n", corpus_utt());
		  if (mfcc) {
		    ckd_free(mfcc[0]);
		    ckd_free(mfcc);
		    mfcc = NULL;
		  }
		  continue;
		}

		feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	        feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

		for (s = 0; s < n_phone; s++) {
		    segdmp_add_feat(phone[s],
				    &feat[start[s]],
				    len[s]);
		}

		feat_array_free(feat);
		free(&mfcc[0][0]);
		ckd_free(mfcc);
	}
	else {
	    E_FATAL("No data type specified\n");
	}

	free(trans);	/* alloc'ed using strdup, not ckd_*() */
	free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	ckd_free(word);
	ckd_free(phone);
	ckd_free(start);
	ckd_free(len);
    }

    return 0;
}
コード例 #4
0
ファイル: corpus.c プロジェクト: lliuguangbo/sphinxtrain
static int
corpus_read_next_transcription_line(char **trans)
{
    char utt_id[512];
    char *s;

    /* look for a close paren in the line */
    s = strrchr(transcription_line, ')');

    if (s != NULL) {
	int nspace;
	/* found a close paren */

	nspace = strspn(s + 1, " \t\r\n");
	if (s[nspace + 1] == '\0') {
	    /* it is at the end of the line */
	    *s = '\0';		/* terminate the string at the paren */

	    /* search for a matching open paren */
	    for (s--; (s >= transcription_line) && (*s != '('); s--);

	    if (*s == '(') {
		/* found a matching open paren */

		assert(strlen(s+1) < 512);

		strcpy(utt_id, s+1);

		if (strcmp_ci(utt_id, corpus_utt()) != 0) {
		    char *uttfullname = corpus_utt_full_name();
		    int suffpos = strlen(uttfullname) - strlen(utt_id);

		    if (suffpos >= 0 && strlen(utt_id) > 0 && strcmp_ci(&uttfullname[suffpos], utt_id) != 0) {
		        E_WARN("Utterance id in transcription file, '%s', does not match filename in control path '%s'.\n",
			       utt_id, uttfullname);
		    }
		}

		/* look for the first non-whitespace character before
		   the open paren */
		for (--s; (s >= transcription_line) && isspace((unsigned char)*s); s--);
		if (s < transcription_line) {
		    E_FATAL("Utterance transcription is empty: %s\n", transcription_line);
		}
		++s;
		*s = '\0';	/* terminate the string at the first whitespace character
				   following the first non-whitespace character found above */
	    }
	    else {
		E_ERROR("Expected open paren after ending close paren in line: '%s'\n", transcription_line);
		return S3_ERROR;
	    }
	}
	else {
	    /* close paren not at end of line so assume it is not
	       the close paren associated with the utt id */
	}
    }
    else {
	/* No close paren, so no utt id */
	
	/* This is fine, but the user gets no explicit sanity check
	   for the ordering of the LSN file */
    }

    *trans = strdup(transcription_line);
    
    return S3_SUCCESS;
}
コード例 #5
0
ファイル: corpus.c プロジェクト: lliuguangbo/sphinxtrain
int
corpus_get_generic_featurevec(vector_t **mfc,
			      int32 *n_frame,
			      uint32 veclen)
{
    vector_t *out;
    float32 *coeff, **cptr;
    uint32 n_f;
    uint32 n_c;
    uint32 i, j;
    uint32 ret=S3_ERROR;
    uint32 no_retries=0;

    if (!requires_mfcc) {
	/* asked for mfc data, but not set up to send it */
	return S3_ERROR;
    }

    if (mfc)
	cptr = &coeff;
    else {
	/* If mfc == NULL, just get the number of frames. */
	coeff = NULL;
	cptr = NULL;
    }

    do {
	if ((cur_ctl_sf == NO_FRAME) && (cur_ctl_ef == NO_FRAME)) {
	    ret = areadfloat(mk_filename(DATA_TYPE_MFCC, cur_ctl_path),
			     cptr, (int *)&n_c);
	}
	else if ((cur_ctl_sf != NO_FRAME) && (cur_ctl_ef != NO_FRAME)) {
	    ret = areadfloat_part(mk_filename(DATA_TYPE_MFCC, cur_ctl_path),
				  cur_ctl_sf * veclen,
				  (cur_ctl_ef + 1) * veclen - 1,
				  cptr, (int *)&n_c);
	}
	else {
	    E_FATAL("Both start and end frame must be set in the ctl file\n");
	}

	if (ret == S3_ERROR) {
	    E_ERROR_SYSTEM("Failed to read MFC file '%s'. Retrying after sleep...\n",
		    mk_filename(DATA_TYPE_MFCC, cur_ctl_path));
	    no_retries++;
	    sleep(3);
	    if(no_retries>100){ 
	      E_FATAL("Failed to get the files after 100 retries (about 300 seconds)\n");
	    }
	}
    } while (ret == S3_ERROR);

    if ((ret == 0) && (cur_ctl_sf != NO_FRAME) && (cur_ctl_ef != NO_FRAME)) {
	E_ERROR("Region [%d %d] for %s extends beyond end of file\n",
		cur_ctl_sf, cur_ctl_ef, corpus_utt());
    }

    if ((n_c % veclen) != 0) {
	E_FATAL("Expected mfcc vector len of %d, got %d (%d)\n", veclen, n_c % veclen, n_c);
    }
    
    n_f = n_c / veclen;

    if (n_f == 0) {
	if (mfc)
	    *mfc = NULL;
	if (n_frame)
	    *n_frame = 0;
    }

    if (mfc && coeff) {
	out = (vector_t *)ckd_calloc(n_f, sizeof(vector_t));

	for (i = 0, j = 0; i < n_f; i++, j += veclen) {
	    out[i] = &coeff[j];
	}

	*mfc = out;
    }
    if (n_frame)
	*n_frame = n_f;

    return S3_SUCCESS;
}
コード例 #6
0
ファイル: cnt_phn_seg.c プロジェクト: 10v/cmusphinx
int
cnt_phn_seg(model_def_t *mdef,
	    lexicon_t *lex,
	    uint32 **out_n_seg,
	    uint32 ***out_n_frame_per)
{
    uint32 seq_no = 0;
    uint16 *seg;
    uint32 n_frame;
    uint32 i, j;
    uint32 n_acmod;
    uint32 *phone;
    uint32 n_phone;
    uint32 *n_seg;
    uint32 **n_frame_per;
    uint32 *start;
    uint32 *len;
    seg_len_t *cur;
    seg_len_t *tmp;
    seg_len_t *phn_hd;
    seg_len_t *phn_tl;

    n_acmod = acmod_set_n_acmod(mdef->acmod_set);

    E_INFO("Counting # occ. for %u models\n", n_acmod);
    
    n_seg = ckd_calloc(n_acmod, sizeof(uint32));
    hd = ckd_calloc(n_acmod, sizeof(seg_len_t *));
    tl = ckd_calloc(n_acmod, sizeof(seg_len_t *));

    for (seq_no = corpus_get_begin(); corpus_next_utt(); seq_no++) {
	if (!(seq_no % 250)) {
	    fprintf(stderr, " cnt[%u]", seq_no);
	    fflush(stderr);
	}
	
	corpus_get_seg(&seg, &n_frame);

	phone = get_next_phnseq(mdef, lex, &n_phone);
	
	ck_seg(mdef->acmod_set, phone, n_phone, seg, n_frame, corpus_utt());
	
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	mk_seg(mdef->acmod_set, seg, n_frame, phone, start, len, n_phone);

	ckd_free(start);
	ckd_free(seg);
	ckd_free(phone);

	for (i = 0; i < n_phone; i++) {
	    /* insert the len for list phone[i] */

	    phn_hd = hd[phone[i]];
	    phn_tl = tl[phone[i]];

	    cur = (seg_len_t *)ckd_calloc(1, sizeof(seg_len_t));

	    cur->len = len[i];

	    if (phn_tl == NULL) {
		hd[phone[i]] = tl[phone[i]] = cur;
	    }
	    else {
		phn_tl->nxt = cur;
		tl[phone[i]] = cur;
	    }
	}
	ckd_free(len);
    }

    n_frame_per = (uint32 **)ckd_calloc(n_acmod, sizeof(uint32 *));

    for (i = 0; i < n_acmod; i++) {
	if (hd[i] == NULL) {
	    n_seg[i] = 0;
	}
	else {
	    for (cur = hd[i], j = 0; cur != NULL; j++, cur = cur->nxt);

	    n_seg[i] = j;

	    n_frame_per[i] = (uint32 *)ckd_calloc(n_seg[i], sizeof(uint32));

	    for (cur = hd[i], j = 0; cur != NULL; j++, cur = cur->nxt)
		n_frame_per[i][j] = cur->len;

	    for (cur = hd[i]; cur != NULL; cur = tmp) {
		tmp = cur->nxt;
		ckd_free(cur);
	    }
	    
	    E_INFO("phn= %s n_seg= %u\n",
		   acmod_set_id2name(mdef->acmod_set, i),
		   n_seg[i]);
	}
    }

    ckd_free(hd);
    ckd_free(tl);

    *out_n_seg = n_seg;
    *out_n_frame_per = n_frame_per;

    return S3_SUCCESS;
}
コード例 #7
0
int32
viterbi_update(float64 *log_forw_prob,
	       vector_t **feature,
	       uint32 n_obs,
	       state_t *state_seq,
	       uint32 n_state,
	       model_inventory_t *inv,
	       float64 a_beam,
	       float32 spthresh,
	       s3phseg_t *phseg,
	       int32 mixw_reest,
	       int32 tmat_reest,
	       int32 mean_reest,
	       int32 var_reest,
	       int32 pass2var,
	       int32 var_is_full,
	       FILE *pdumpfh,
	       feat_t *fcb)
{
    float64 *scale = NULL;
    float64 **dscale = NULL;
    float64 **active_alpha;
    uint32 **active_astate;
    uint32 **bp;
    uint32 *n_active_astate;
    gauden_t *g;		/* Gaussian density parameters and
				   reestimation sums */
    float32 ***mixw;		/* all mixing weights */
    float64 ***now_den = NULL;	/* Short for den[t] */
    uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */
    uint32 *active_cb;
    uint32 n_active_cb;
    float32 **tacc;		/* Transition matrix reestimation sum accumulators
				   for the utterance. */
    float32 ***wacc;		/* mixing weight reestimation sum accumulators
				   for the utterance. */
    float32 ***denacc = NULL;	/* mean/var reestimation accumulators for time t */
    size_t denacc_size;		/* Total size of data references in denacc.  Allows
				   for quick clears between time frames */
    uint32 n_lcl_cb;
    uint32 *cb_inv;
    uint32 i, j, q;
    int32 t;
    uint32 n_feat;
    uint32 n_density;
    uint32 n_top;
    int ret;
    timing_t *fwd_timer = NULL;
    timing_t *rstu_timer = NULL;
    timing_t *gau_timer = NULL;
    timing_t *rsts_timer = NULL;
    timing_t *rstf_timer = NULL;
    float64 log_fp;	/* accumulator for the log of the probability
			 * of observing the input given the model */
    uint32 max_n_next = 0;
    uint32 n_cb;

    static float64 *p_op = NULL;
    static float64 *p_ci_op = NULL;
    static float64 **d_term = NULL;
    static float64 **d_term_ci = NULL;

    /* caller must ensure that there is some non-zero amount
       of work to be done here */
    assert(n_obs > 0);
    assert(n_state > 0);

    /* Get the forward estimation CPU timer */
    fwd_timer = timing_get("fwd");
    /* Get the per utterance reestimation CPU timer */
    rstu_timer = timing_get("rstu");
    /* Get the Gaussian density evaluation CPU timer */
    gau_timer = timing_get("gau");
    /* Get the per state reestimation CPU timer */
    rsts_timer = timing_get("rsts");
    /* Get the per frame reestimation CPU timer */
    rstf_timer = timing_get("rstf");

    g = inv->gauden;
    n_feat = gauden_n_feat(g);
    n_density = gauden_n_density(g);
    n_top = gauden_n_top(g);
    n_cb = gauden_n_mgau(g);

    if (p_op == NULL) {
	p_op    = ckd_calloc(n_feat, sizeof(float64));
	p_ci_op = ckd_calloc(n_feat, sizeof(float64));
    }

    if (d_term == NULL) {
	d_term    = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64));
	d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64));
    }

    scale = (float64 *)ckd_calloc(n_obs, sizeof(float64));
    dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32));
    active_alpha  = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));
    active_cb = ckd_calloc(2*n_state, sizeof(uint32));
    bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));

    /* Run forward algorithm, which has embedded Viterbi. */
    if (fwd_timer)
	timing_start(fwd_timer);
    ret = forward(active_alpha, active_astate, n_active_astate, bp,
		  scale, dscale,
		  feature, n_obs, state_seq, n_state,
		  inv, a_beam, phseg, 0);
    /* Dump a phoneme segmentation if requested */
    if (cmd_ln_str("-outphsegdir")) {
	    const char *phsegdir;
	    char *segfn, *uttid;

	    phsegdir = cmd_ln_str("-outphsegdir");
	    uttid = (cmd_ln_int32("-outputfullpath")
		     ? corpus_utt_full_name() : corpus_utt());
	    segfn = ckd_calloc(strlen(phsegdir) + 1
			       + strlen(uttid)
			       + strlen(".phseg") + 1, 1);
	    strcpy(segfn, phsegdir);
	    strcat(segfn, "/");
	    strcat(segfn, uttid);
	    strcat(segfn, ".phseg");
	    write_phseg(segfn, inv, state_seq, active_astate, n_active_astate,
			n_state, n_obs, active_alpha, scale, bp);
	    ckd_free(segfn);
    }
    if (fwd_timer)
	timing_stop(fwd_timer);


    if (ret != S3_SUCCESS) {

	/* Some problem with the utterance, release per utterance storage and
	 * forget about adding the utterance accumulators to the global accumulators */

	goto all_done;
    }

    mixw = inv->mixw;

    if (mixw_reest) {
	/* Need to reallocate mixing accumulators for utt */
	if (inv->l_mixw_acc) {
	    ckd_free_3d((void ***)inv->l_mixw_acc);
	    inv->l_mixw_acc = NULL;
	}
	inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse,
						     n_feat,
						     n_density,
						     sizeof(float32));
    }
    wacc = inv->l_mixw_acc;
    n_lcl_cb = inv->n_cb_inverse;
    cb_inv = inv->cb_inverse;

    /* Allocate local accumulators for mean, variance reestimation
       sums if necessary */
    gauden_alloc_l_acc(g, n_lcl_cb,
		       mean_reest, var_reest,
		       var_is_full);

    if (tmat_reest) {
	if (inv->l_tmat_acc) {
	    ckd_free_2d((void **)inv->l_tmat_acc);
	    inv->l_tmat_acc = NULL;
	}
	for (i = 0; i < n_state; i++) {
	    if (state_seq[i].n_next > max_n_next)
		max_n_next = state_seq[i].n_next;
	}
	inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state,
						    max_n_next,
						    sizeof(float32));
    }
    /* transition matrix reestimation sum accumulators
       for the utterance */
    tacc = inv->l_tmat_acc;

    n_active_cb = 0;
    now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb,
					 n_feat,
					 n_top,
					 sizeof(float64));
    now_den_idx =  (uint32 ***)ckd_calloc_3d(n_lcl_cb,
					     n_feat,
					     n_top,
					     sizeof(uint32));

    if (mean_reest || var_reest) {
	/* allocate space for the per frame density counts */
	denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb,
					    n_feat,
					    n_density,
					    sizeof(float32));

	/* # of bytes required to store all weighted vectors */
	denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32);
    }
    else {
	denacc = NULL;
	denacc_size = 0;
    }

    /* Okay now run through the backtrace and accumulate counts. */
    /* Find the non-emitting ending state */
    for (q = 0; q < n_active_astate[n_obs-1]; ++q) {
	if (active_astate[n_obs-1][q] == n_state-1)
	    break;
    }
    if (q == n_active_astate[n_obs-1]) {
	E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n");
	ret = S3_ERROR;
	goto all_done;
    }

    for (t = n_obs-1; t >= 0; --t) {
	uint32 l_cb;
	uint32 l_ci_cb;
	float64 op, p_reest_term;
	uint32 prev;

	j = active_astate[t][q];

	/* Follow any non-emitting states at time t first. */
	while (state_seq[j].mixw == TYING_NON_EMITTING) {
	    prev = active_astate[t][bp[t][q]];

#if VITERBI_DEBUG
	    printf("Following non-emitting state at time %d, %u => %u\n",
		   t, j, prev);
#endif
	    /* Backtrace and accumulate transition counts. */
	    if (tmat_reest) {
		assert(tacc != NULL);
		tacc[prev][j - prev] += 1.0;
	    }
	    q = bp[t][q];
	    j = prev;
	}

	/* Now accumulate statistics for the real state. */
	l_cb = state_seq[j].l_cb;
	l_ci_cb = state_seq[j].l_ci_cb;
	n_active_cb = 0;

	if (gau_timer)
	    timing_start(gau_timer);

	gauden_compute_log(now_den[l_cb],
			   now_den_idx[l_cb],
			   feature[t],
			   g,
			   state_seq[j].cb,
			   NULL);
	active_cb[n_active_cb++] = l_cb;

	if (l_cb != l_ci_cb) {
	    gauden_compute_log(now_den[l_ci_cb],
			       now_den_idx[l_ci_cb],
			       feature[t],
			       g,
			       state_seq[j].ci_cb,
			       NULL);
	    active_cb[n_active_cb++] = l_ci_cb;
	}
	gauden_scale_densities_bwd(now_den, now_den_idx,
				   &dscale[t],
				   active_cb, n_active_cb, g);

	assert(state_seq[j].mixw != TYING_NON_EMITTING);
	/* Now calculate mixture densities. */
	/* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */
	op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb],
			    mixw[state_seq[j].mixw], g);
	if (gau_timer)
	    timing_stop(gau_timer);

	if (rsts_timer)
	    timing_start(rsts_timer);
	/* Make up this bogus value to be consistent with backward.c */
	p_reest_term = 1.0 / op;

	/* Compute the output probability excluding the contribution
	 * of each feature stream.  i.e. p_op[0] is the output
	 * probability excluding feature stream 0 */
	partial_op(p_op,
		   op,
		   now_den[l_cb],
		   now_den_idx[l_cb],
		   mixw[state_seq[j].mixw],
		   n_feat,
		   n_top);

	/* compute the probability of each (of possibly topn) density */
	den_terms(d_term,
		  p_reest_term,
		  p_op,
		  now_den[l_cb],
		  now_den_idx[l_cb],
		  mixw[state_seq[j].mixw],
		  n_feat,
		  n_top);

	if (l_cb != l_ci_cb) {
	    /* For each feature stream f, compute:
	     *     sum_k(mixw[f][k] den[f][k])
	     * and store the results in p_ci_op */
	    partial_ci_op(p_ci_op,
			  now_den[l_ci_cb],
			  now_den_idx[l_ci_cb],
			  mixw[state_seq[j].ci_mixw],
			  n_feat,
			  n_top);

	    /* For each feature stream and density compute the terms:
	     *   w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j
	     * and store results in d_term_ci */
	    den_terms_ci(d_term_ci,
			 1.0, /* post_j = 1.0 */
			 p_ci_op,
			 now_den[l_ci_cb],
			 now_den_idx[l_ci_cb],
			 mixw[state_seq[j].ci_mixw],
			 n_feat,
			 n_top);
	}
		    

	/* accumulate the probability for each density in the mixing
	 * weight reestimation accumulators */
	if (mixw_reest) {
	    accum_den_terms(wacc[state_seq[j].l_mixw], d_term,
			    now_den_idx[l_cb], n_feat, n_top);

	    /* check if mixw and ci_mixw are different to avoid
	     * doubling the EM counts in a CI run. */
	    if (state_seq[j].mixw != state_seq[j].ci_mixw) {
                if (n_cb < inv->n_mixw) {
                    /* semi-continuous, tied mixture, and discrete case */
		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term,
				    now_den_idx[l_cb], n_feat, n_top);
		}
		else {
		    /* continuous case */
		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci,
				    now_den_idx[l_ci_cb], n_feat, n_top);
		}
	    }
	}
		    
	/* accumulate the probability for each density in the 
	 * density reestimation accumulators */
	if (mean_reest || var_reest) {
	    accum_den_terms(denacc[l_cb], d_term,
			    now_den_idx[l_cb], n_feat, n_top);
	    if (l_cb != l_ci_cb) {
		accum_den_terms(denacc[l_ci_cb], d_term_ci,
				now_den_idx[l_ci_cb], n_feat, n_top);
	    }
	}
		
	if (rsts_timer)
	    timing_stop(rsts_timer);
	/* Note that there is only one state/frame so this is kind of
	   redundant */
 	if (rstf_timer)
	    timing_start(rstf_timer);
	if (mean_reest || var_reest) {
	    /* Update the mean and variance reestimation accumulators */
	    if (pdumpfh)
		fprintf(pdumpfh, "time %d:\n", t);
	    accum_gauden(denacc,
			 cb_inv,
			 n_lcl_cb,
			 feature[t],
			 now_den_idx,
			 g,
			 mean_reest,
			 var_reest,
			 pass2var,
			 inv->l_mixw_acc,
			 var_is_full,
			 pdumpfh,
			 fcb);
	    memset(&denacc[0][0][0], 0, denacc_size);
	}
	if (rstf_timer)
	    timing_stop(rstf_timer);

	if (t > 0) { 
	    prev = active_astate[t-1][bp[t][q]];
#if VITERBI_DEBUG
	    printf("Backtrace at time %d, %u => %u\n",
		   t, j, prev);
#endif
	    /* Backtrace and accumulate transition counts. */
	    if (tmat_reest) {
		assert(tacc != NULL);
		tacc[prev][j-prev] += 1.0;
	    }
	    q = bp[t][q];
	    j = prev;
	}
    }

    /* If no error was found, add the resulting utterance reestimation
     * accumulators to the global reestimation accumulators */
    if (rstu_timer)
	timing_start(rstu_timer);
    accum_global(inv, state_seq, n_state,
		 mixw_reest, tmat_reest, mean_reest, var_reest,
		 var_is_full);
    if (rstu_timer)
	timing_stop(rstu_timer);

    /* Find the final state */
    for (i = 0; i < n_active_astate[n_obs-1]; ++i) {
	if (active_astate[n_obs-1][i] == n_state-1)
	    break;
    }
    /* Calculate log[ p( O | \lambda ) ] */
    assert(active_alpha[n_obs-1][i] > 0);
    log_fp = log(active_alpha[n_obs-1][i]);
    for (t = 0; t < n_obs; t++) {
	assert(scale[t] > 0);
	log_fp -= log(scale[t]);
	for (j = 0; j < inv->gauden->n_feat; j++) {
	    log_fp += dscale[t][j];
	}
    }

    *log_forw_prob = log_fp;

 all_done:
    ckd_free((void *)scale);
    for (i = 0; i < n_obs; i++) {
	if (dscale[i])
	    ckd_free((void *)dscale[i]);
    }
    ckd_free((void **)dscale);
    
    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
	ckd_free((void *)active_alpha[i]);
	ckd_free((void *)active_astate[i]);
	ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free((void *)active_cb);

    if (denacc)
	ckd_free_3d((void ***)denacc);

    if (now_den)
	ckd_free_3d((void ***)now_den);
    if (now_den_idx)
	ckd_free_3d((void ***)now_den_idx);

    if (ret != S3_SUCCESS)
	E_ERROR("%s ignored\n", corpus_utt_brief_name());

    return ret;
}
コード例 #8
0
ファイル: baum_welch.c プロジェクト: Jared-Prime/cmusphinx
int32
baum_welch_update(float64 *log_forw_prob,
                  vector_t **feature,
                  uint32 n_obs,
                  state_t *state,
                  uint32 n_state,
                  model_inventory_t *inv,
                  float64 a_beam,
                  float64 b_beam,
                  float32 spthresh,
                  s3phseg_t *phseg,
                  int32 mixw_reest,
                  int32 tmat_reest,
                  int32 mean_reest,
                  int32 var_reest,
                  int32 pass2var,
                  int32 var_is_full,
                  FILE *pdumpfh,
                  bw_timers_t *timers,
                  feat_t *fcb)
{
    float64 *scale = NULL;
    float64 **dscale = NULL;
    float64 **active_alpha;
    uint32 **active_astate;
    uint32 **bp;
    uint32 *n_active_astate;
    float64 log_fp;	/* accumulator for the log of the probability
			 * of observing the input given the model */
    uint32 t;		/* time */
    int ret;
    uint32 i,j;

    /* caller must ensure that there is some non-zero amount
       of work to be done here */
    assert(n_obs > 0);
    assert(n_state > 0);

    scale = (float64 *)ckd_calloc(n_obs, sizeof(float64));
    dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32));
    active_alpha  = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));
    bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));

    /* Compute the scaled alpha variable and scale factors
     * for all states and time subject to the pruning constraints */
    if (timers)
        ptmr_start(&timers->fwd_timer);

    /*
     * Debug?
     *   E_INFO("Before Forward search\n");
     */
    ret = forward(active_alpha, active_astate, n_active_astate, bp,
                  scale, dscale,
                  feature, n_obs, state, n_state,
                  inv, a_beam, phseg, timers, 0);

#if BW_DEBUG
    for (i=0 ; i < n_obs; i++) {
        E_INFO("Number of active states %d at time %d\n",n_active_astate[i],i);
        E_INFO("Scale of time %d is %e \n",i,scale[i]);
        for(j=0 ; j < n_active_astate[i]; j++) {
            E_INFO("Active state: %d Active alpha: %e\n",active_astate[i][j], active_alpha[i][j]);
        }
    }
    i=0;
    j=0;
#endif

    /* Dump a phoneme segmentation if requested */
    if (cmd_ln_str("-outphsegdir")) {
        const char *phsegdir;
        char *segfn, *uttid;

        phsegdir = cmd_ln_str("-outphsegdir");
        uttid = (cmd_ln_int32("-outputfullpath")
                 ? corpus_utt_full_name() : corpus_utt());
        segfn = ckd_calloc(strlen(phsegdir) + 1
                           + strlen(uttid)
                           + strlen(".phseg") + 1, 1);
        strcpy(segfn, phsegdir);
        strcat(segfn, "/");
        strcat(segfn, uttid);
        strcat(segfn, ".phseg");
        write_phseg(segfn, inv, state, active_astate, n_active_astate,
                    n_state, n_obs, active_alpha, scale, bp);
        ckd_free(segfn);
    }

    if (timers)
        ptmr_stop(&timers->fwd_timer);

    if (ret != S3_SUCCESS) {

        /* Some problem with the utterance, release per utterance storage and
         * forget about adding the utterance accumulators to the global accumulators */

        goto error;
    }

    /* Compute the scaled beta variable and update the reestimation
     * sums */
    if (timers)
        ptmr_start(&timers->bwd_timer);

#if BW_DEBUG
    E_INFO("Before Backward search\n");
#endif

    ret = backward_update(active_alpha, active_astate, n_active_astate, scale, dscale,
                          feature, n_obs,
                          state, n_state,
                          inv, b_beam, spthresh,
                          mixw_reest, tmat_reest, mean_reest, var_reest, pass2var,
                          var_is_full, pdumpfh, timers, fcb);
    if (timers)
        ptmr_stop(&timers->bwd_timer);

    if (ret != S3_SUCCESS) {

        /* Some problem with the utterance, release per utterance storage and
         * forget about adding the utterance accumulators to the global accumulators */

        goto error;
    }

#if BW_DEBUG
    E_INFO("Before Global Accumulation\n");
#endif

    /* If no error was found in the forward or backward procedures,
     * add the resulting utterance reestimation accumulators to the
     * global reestimation accumulators */
    if (timers)
        ptmr_start(&timers->rstu_timer);
    accum_global(inv, state, n_state,
                 mixw_reest, tmat_reest, mean_reest, var_reest,
                 var_is_full);
    if (timers)
        ptmr_stop(&timers->rstu_timer);

    for (i = 0; i < n_active_astate[n_obs-1] && active_astate[n_obs-1][i] != (n_state-1); i++);

    assert(i < n_active_astate[n_obs-1]);

    /* Calculate log[ p( O | \lambda ) ] */
    assert(active_alpha[n_obs-1][i] > 0);
    log_fp = log(active_alpha[n_obs-1][i]);
    for (t = 0; t < n_obs; t++) {
        assert(scale[t] > 0);
        log_fp -= log(scale[t]);
        for (j = 0; j < inv->gauden->n_feat; j++) {
            log_fp += dscale[t][j];
        }
    }

    *log_forw_prob = log_fp;

    ckd_free((void *)scale);
    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
        ckd_free((void *)active_alpha[i]);
        ckd_free((void *)active_astate[i]);
        ckd_free((void *)dscale[i]);
        ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free((void **)dscale);
    ckd_free(bp);

    return S3_SUCCESS;

error:
    ckd_free((void *)scale);
    for (i = 0; i < n_obs; i++) {
        if (dscale[i])
            ckd_free((void *)dscale[i]);
    }
    ckd_free((void **)dscale);

    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
        ckd_free((void *)active_alpha[i]);
        ckd_free((void *)active_astate[i]);
        ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free(bp);

    E_ERROR("%s ignored\n", corpus_utt_brief_name());

    return S3_ERROR;
}
コード例 #9
0
int
agg_all_seg(feat_t *fcb,
	    segdmp_type_t type,
	    const char *fn,
	    uint32 stride)
{
    uint32 seq_no;
    vector_t *mfcc = NULL;
    uint32 mfc_veclen = cmd_ln_int32("-ceplen");
    uint32 n_frame;
    uint32 n_out_frame;
    uint32 blksz=0;
    vector_t **feat = NULL;
    uint32 i, j;
    uint32 t;
    uint32 n_stream;
    const uint32 *veclen;
    FILE *fp;
    uint32 ignore = 0;
    long start;
    int32 no_retries=0;
    
    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);
    for (i = 0, blksz = 0; i < n_stream; i++)
        blksz += veclen[i];

    fp = open_dmp(fn);

    start = ftell(fp);

    if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");

	return S3_ERROR;
    }

    for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0;
	 corpus_next_utt(); seq_no++) {
	    if (mfcc) {
		free(mfcc[0]);
		ckd_free(mfcc);

		mfcc = NULL;
	    }

	    /* get the MFCC data for the utterance */
	    if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
	      E_FATAL("Can't read input features from %s\n", corpus_utt());
	    }

	if ((seq_no % 1000) == 0) {
	    E_INFO("[%u]\n", seq_no);
	}

	    if (feat) {
		feat_array_free(feat);
		feat = NULL;
	    }
	    
	    if (n_frame < 9) {
	      E_WARN("utt %s too short\n", corpus_utt());
	      if (mfcc) {
		ckd_free(mfcc[0]);
		ckd_free(mfcc);
		mfcc = NULL;
	      }
	      continue;
	    }

	    feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	    feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

	    for (t = 0; t < n_frame; t++, j++) {
		if ((j % stride) == 0) {
		    while (s3write(&feat[t][0][0],
				   sizeof(float32),
				   blksz,
				   fp, &ignore) != blksz) {
			static int rpt = 0;

			if (!rpt) {
			    E_ERROR_SYSTEM("Unable to write to dmp file");
			    E_INFO("sleeping...\n");
			    no_retries++;
			}
			sleep(3);

			if(no_retries > 10){
			  E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n ");
			}
		    }
		    ++n_out_frame;
		}
	    }
    }

    if (fseek(fp, start, SEEK_SET) < 0) {
	E_ERROR_SYSTEM("Unable to seek to begin of dmp");

	return S3_ERROR;
    }

    E_INFO("Wrote %u frames to %s\n", n_out_frame, fn);

    if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");
	
	return S3_ERROR;
    }

    return S3_SUCCESS;
}