void free_tree(dtree_t *tr) { uint32 i; dtree_node_t *node; for (i = 0; i < tr->n_node; i++) { node = &tr->node[i]; if (node->mixw_occ) ckd_free_3d((void ***)node->mixw_occ); if (node->means) ckd_free_3d((void ***)node->means); if (node->vars) ckd_free_3d((void ***)node->vars); if (node->id) ckd_free(node->id); /* node->q not freed because if tr is a simple tree, it points to the question in the master list of questions that needs to stick around */ } ckd_free(tr->node); ckd_free(tr); }
int dict2pid_free(dict2pid_t * d2p) { if (d2p == NULL) return 0; if (--d2p->refcount > 0) return d2p->refcount; if (d2p->ldiph_lc) ckd_free_3d((void ***) d2p->ldiph_lc); if (d2p->lrdiph_rc) ckd_free_3d((void ***) d2p->lrdiph_rc); if (d2p->rssid) free_compress_map(d2p->rssid, bin_mdef_n_ciphone(d2p->mdef)); if (d2p->lrssid) free_compress_map(d2p->lrssid, bin_mdef_n_ciphone(d2p->mdef)); bin_mdef_free(d2p->mdef); dict_free(d2p->dict); ckd_free(d2p); return 0; }
int wr_tmat(const char *fn) { if (s3tmat_write(fn, otmat, n_tmat_o, n_state_pm) != S3_SUCCESS) return S3_ERROR; ckd_free_3d((void ***)otmat); ckd_free_3d((void ***)itmat); return S3_SUCCESS; }
int wr_mixw(const char *fn) { if (s3mixw_write(fn, omixw, n_mixw_o, n_stream, n_density) != S3_SUCCESS) return S3_ERROR; ckd_free_3d((void ***)omixw); ckd_free_3d((void ***)imixw); return S3_SUCCESS; }
int feat_free(feat_t * f) { if (f == 0) return 0; if (--f->refcount > 0) return f->refcount; if (f->cepbuf) ckd_free_2d((void **) f->cepbuf); ckd_free(f->tmpcepbuf); if (f->name) { ckd_free((void *) f->name); } if (f->lda) ckd_free_3d((void ***) f->lda); ckd_free(f->stream_len); ckd_free(f->sv_len); ckd_free(f->sv_buf); subvecs_free(f->subvecs); cmn_free(f->cmn_struct); agc_free(f->agc_struct); ckd_free(f); return 0; }
/* * RAH, Free memory allocated in tmat_init () */ void tmat_free (tmat_t *t) { if (t) { if (t->tp) ckd_free_3d ((void ***) t->tp); ckd_free ((void *) t); } }
void s2_semi_mgau_free(s2_semi_mgau_t *ps) { s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; uint32 i; logmath_free(s->lmath); logmath_free(s->lmath_8b); if (s->sendump_mmap) { for (i = 0; i < s->n_feat; ++i) { ckd_free(s->mixw[i]); } ckd_free(s->mixw); mmio_file_unmap(s->sendump_mmap); } else { ckd_free_3d(s->mixw); } if (s->means) { for (i = 0; i < s->n_feat; ++i) { ckd_free(s->means[i]); } ckd_free(s->means); } if (s->vars) { for (i = 0; i < s->n_feat; ++i) { ckd_free(s->vars[i]); } ckd_free(s->vars); } for (i = 0; i < s->n_kdtrees; ++i) free_kd_tree(s->kdtrees[i]); ckd_free(s->kdtrees); ckd_free(s->veclen); ckd_free(s->topn_beam); ckd_free_2d(s->topn_hist_n); ckd_free_3d((void **)s->topn_hist); ckd_free_2d((void **)s->dets); ckd_free(s); }
int32 feat_read_lda(const char *ldafile, uint32 dim) { if (lda.lda != NULL) ckd_free_3d((void ***)lda.lda); lda.lda = lda_read(ldafile, &lda.n_lda, &lda.lda_rows, &lda.lda_cols); if (lda.lda == NULL) return S3_ERROR; lda.lda_dim = dim; assert(lda.lda_cols == feat_conf[fid].blksize()); return S3_SUCCESS; }
void subvq_free (subvq_t *s) { int32 i; for (i = 0; i < s->n_sv; i++) { vector_gautbl_free (&(s->gautbl[i])); ckd_free ((void *) s->featdim[i]); } ckd_free_3d ((void ***) s->map); ckd_free ((void *) s->gautbl); ckd_free ((void *) s->featdim); ckd_free ((void *) s); }
void gauden_free(gauden_t * g) { if (g == 0) return; if (g->mean) gauden_param_free(g->mean); if (g->var) gauden_param_free(g->var); if (g->det) ckd_free_3d(g->det); if (g->featlen) ckd_free(g->featlen); ckd_free(g); }
void gauden_free(gauden_t * g) { if (g == NULL) return; if (g->mean) gauden_param_free(g->mean); if (g->var) gauden_param_free(g->var); if (g->det) ckd_free_3d((void *) g->det); if (g->featlen) ckd_free(g->featlen); if (dist) ckd_free(dist); ckd_free(g); }
void ms_mgau_free(ps_mgau_t * mg) { ms_mgau_model_t *msg = (ms_mgau_model_t *)mg; if (msg == 0) return; if (msg->g) gauden_free(msg->g); if (msg->s) senone_free(msg->s); if (msg->dist) ckd_free_3d((void *) msg->dist); if (msg->mgau_active) ckd_free(msg->mgau_active); ckd_free(msg); }
void subvq_free (subvq_t *s) { int32 i; for (i = 0; i < s->n_sv; i++) { ckd_free_2d ((void **) s->mean[i]); ckd_free_2d ((void **) s->var[i]); ckd_free ((void *) s->featdim[i]); } ckd_free ((void *) s->svsize); ckd_free ((void *) s->featdim); ckd_free ((void *) s->mean); ckd_free ((void *) s->var); ckd_free_3d ((void ***) s->map); ckd_free ((void *) s); }
int32 free_mllr_reg(float32 *****regl, float32 ****regr, uint32 n_class, uint32 n_stream) { uint32 i,j; for (i=0; i < n_class; i++) { for (j=0; j < n_stream; j++) { ckd_free_3d((void ***)regl[i][j]); ckd_free_2d((void **)regr[i][j]); } } ckd_free_2d((void **)regl); ckd_free_2d((void **)regr); return S3_SUCCESS; }
/* RAH, free memory allocated by subvq_init() */ void subvq_free (subvq_t *s) { int i; if (s) { for (i=0;i<s->n_sv;i++) { /* vector_gautbl_free (&(s->gautbl[i]));*/ if (s->featdim[i]) ckd_free ((void *) s->featdim[i]); } if (s->featdim) ckd_free ((void *) s->featdim); /* Free gaussian table */ if (s->gautbl) ckd_free ((void *)s->gautbl); /* Free map */ if (s->map) ckd_free_3d ((void ***) s->map); if (s->subvec) ckd_free ((void *) s->subvec); if (s->vqdist) ckd_free_2d ((void **) s->vqdist); if (s->gauscore) ckd_free ((void *) s->gauscore); if (s->mgau_sl) ckd_free ((void *) s->mgau_sl); ckd_free ((void *)s); } }
int ld_finish(live_decoder_t *decoder) { cmd_ln_free(); /* lgalescu@ihmc -- this is not right! the string is allocated with decoder! ckd_free(decoder->kb.uttid); */ kb_free(&decoder->kb); /* consult the implementation of feat_array_alloc() for the following two * lines */ /* lgalescu@ihmc -- what's going on here? ckd_free((void *)decoder->features); ckd_free_2d((void **)decoder->features); */ /* lgalescu@ihmc -- replaced the two calls above with the following: */ ckd_free_3d((void ***)decoder->features); decoder->ld_state = LD_STATE_IDLE; return 0; }
float64 best_q(float32 ****mixw, /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */ float32 ****means, float32 ****vars, uint32 *veclen, /* END ADDITION FOR CONTINUOUS_TREES */ uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 **dfeat, uint32 n_dfeat, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 *id, uint32 n_id, float32 ***dist, /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */ float64 node_wt_ent, /* Weighted entropy of node */ /* END ADDITION FOR CONTINUOUS_TREES */ quest_t **out_best_q) { float32 ***yes_dist; /* ADDITION FOR CONTINUOUS_TREES */ float32 ***yes_means=0; float32 ***yes_vars=0; float32 varfloor=0; float64 y_ent; /* END ADDITION FOR CONTINUOUS_TREES */ float64 yes_dnom, yes_norm; uint32 *yes_id; float32 ***no_dist; /* ADDITION FOR CONTINUOUS_TREES */ float32 ***no_means=0; float32 ***no_vars=0; float64 n_ent; /* END ADDITION FOR CONTINUOUS_TREES */ float64 no_dnom, no_norm; uint32 *no_id; uint32 n_yes, n_b_yes = 0; uint32 n_no, n_b_no = 0; uint32 i, j, k, q, b_q=0, s; uint32 ii; float64 einc, b_einc = -1.0e+50; /* ADDITION FOR CONTINUOUS_TREES; 20 May 98 */ char* type; uint32 continuous, sumveclen=0; type = (char *)cmd_ln_access("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; if (continuous == 1) { varfloor = *(float32 *)cmd_ln_access("-varfloor"); /* Allocating for sumveclen is overallocation, but it eases coding */ for (ii=0,sumveclen=0; ii<n_stream; ii++) sumveclen += veclen[ii]; yes_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); yes_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); no_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); no_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); } /* END ADDITIONS FOR CONTINUOUS_TREES */ n_yes = n_no = 0; yes_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); no_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); for (q = 0; q < n_all_q; q++) { memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); /* ADDITION FOR CONTINUOUS_TREES; If continuous hmm initialize means and vars to zero */ if (continuous == 1) { memset(&yes_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&yes_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&no_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&no_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); } /* END ADDITION FOR CONTINUOUS_TREES */ n_yes = n_no = 0; for (ii = 0; ii < n_id; ii++) { i = id[ii]; if (eval_quest(&all_q[q], dfeat[i], n_dfeat)) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] += mixw[i][s][j][k]; } } } /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */ if (continuous == 1) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < veclen[j]; k++) { yes_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k]; yes_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]); } } } } /* END MODIFICATION FOR CONTINUOUS_TREES */ ++n_yes; } else { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] += mixw[i][s][j][k]; } } } /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */ if (continuous == 1) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < veclen[j]; k++) { no_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k]; no_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]); } } } } /* END MODIFICATION FOR CONTINUOUS_TREES */ ++n_no; } } if ((n_yes == 0) || (n_no == 0)) { /* no split. All satisfy or all don't satisfy */ continue; } for (s = 0, einc = 0; s < n_state; s++) { for (k = 0, yes_dnom = 0; k < n_density; k++) { yes_dnom += yes_dist[s][0][k]; } if (yes_dnom == 0) break; yes_norm = 1.0 / yes_dnom; for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] *= yes_norm; } } for (k = 0, no_dnom = 0; k < n_density; k++) { no_dnom += no_dist[s][0][k]; } if (no_dnom == 0) break; no_norm = 1.0 / no_dnom; for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] *= no_norm; } } /* MODIFICATION FOR CONTINUOUS_TREES: Do appropriate operations for discrete and continuous */ if (continuous == 1) { y_ent = 0; n_ent = 0; for (j = 0; j < n_stream; j++) { if (yes_dnom != 0) { for (k = 0; k < veclen[j]; k++) { yes_means[s][j][k] *= yes_norm; yes_vars[s][j][k] = yes_vars[s][j][k]*yes_norm - yes_means[s][j][k]*yes_means[s][j][k]; if (yes_vars[s][j][k] < varfloor) yes_vars[s][j][k] = varfloor; } } if (no_dnom != 0) { for (k = 0; k < veclen[j]; k++) { no_means[s][j][k] *= no_norm; no_vars[s][j][k] = no_vars[s][j][k]*no_norm - no_means[s][j][k]*no_means[s][j][k]; if (no_vars[s][j][k] < varfloor) no_vars[s][j][k] = varfloor; } } y_ent += yes_dnom * ent_cont(yes_means[s][j],yes_vars[s][j],veclen[j]); n_ent += no_dnom * ent_cont(no_means[s][j],no_vars[s][j],veclen[j]); } einc += (float64)stwt[s] * (y_ent + n_ent); } else { einc += (float64)stwt[s] * wt_ent_inc(yes_dist[s], yes_dnom, no_dist[s], no_dnom, dist[s], n_stream, n_density); } } /* END MODIFICATION FOR CONTINUOUS_TREES */ /* ADDITION FOR CONTINUOUS_TREES; In current code this is true only for continous HMM */ if (continuous == 1) { einc -= node_wt_ent; } /* END ADDITION FOR CONTINUOUS_TREES */ if (s < n_state) { /* Ended iteration over states prematurely; assume 'bad' question */ continue; } if (einc > b_einc) { b_einc = einc; b_q = q; n_b_yes = n_yes; n_b_no = n_no; } } if ((n_b_yes == 0) || (n_b_no == 0)) { /* No best question */ *out_best_q = NULL; return 0; } yes_id = (uint32 *)ckd_calloc(n_b_yes, sizeof(uint32)); no_id = (uint32 *)ckd_calloc(n_b_no, sizeof(uint32)); memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); n_yes = n_no = 0; for (ii = 0; ii < n_id; ii++) { i = id[ii]; if (eval_quest(&all_q[b_q], dfeat[i], n_dfeat)) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] += mixw[i][s][j][k]; } } } yes_id[n_yes] = i; ++n_yes; } else { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] += mixw[i][s][j][k]; } } } no_id[n_no] = i; ++n_no; } } ckd_free_3d((void ***)yes_dist); ckd_free((void *)yes_id); ckd_free_3d((void ***)no_dist); ckd_free((void *)no_id); /* ADDITION FOR CONTINUOUS_TREES */ if (continuous == 1) { ckd_free_3d((void ***)yes_means); ckd_free_3d((void ***)yes_vars); ckd_free_3d((void ***)no_means); ckd_free_3d((void ***)no_vars); } /* END ADDITION FOR CONTINUOUS_TREES */ *out_best_q = &all_q[b_q]; return b_einc; }
int32 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) { FILE *fh; int32 byteswap, chksum_present; uint32 chksum, i, m, n; char **argname, **argval; assert(feat); if (feat->n_stream != 1) { E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", feat->n_stream); return -1; } if ((fh = fopen(ldafile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile); return -1; } if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { E_ERROR("Failed to read header from transform file '%s'\n", ldafile); fclose(fh); return -1; } chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) E_WARN("%s: Version mismatch: %s, expecting %s\n", ldafile, argval[i], MATRIX_FILE_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; if (feat->lda) ckd_free_3d((void ***)feat->lda); { /* Use a temporary variable to avoid strict-aliasing problems. */ void ***outlda; if (bio_fread_3d(&outlda, sizeof(float32), &feat->n_lda, &m, &n, fh, byteswap, &chksum) < 0) { E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); fclose(fh); return -1; } feat->lda = (void *)outlda; } fclose(fh); #ifdef FIXED_POINT /* FIXME: This is a fragile hack that depends on mfcc_t and * float32 being the same size (which they are, but...) */ for (i = 0; i < feat->n_lda * m * n; ++i) { feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); } #endif /* Note that SphinxTrain stores the eigenvectors as row vectors. */ if (n != feat->stream_len[0]) E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); /* Override dim from file if it is 0 or greater than m. */ if (dim > m || dim <= 0) { dim = m; } feat->out_dim = dim; return 0; }
int make_tree(float32 **mixw, // Mixture weights for [phone][state] float32 ***means, // Means for [phone][state][dim] float32 ***vars, // Variances for [phone][state][dim] int32 nphones, // Total no. of phones char **phones, // Identities of the phones int32 nstates, // No. of states per phone int32 stt, // State we are building tree for int32 ndim // Dimensionality of feature set ) { float32 ***oldmeans, ***oldvars, ***newmeans, ***newvars; float32 **oldmixw, **newmixw, ***tmp3d, **tmp2d; char **phoneid, **newphoneid, **tmpstr; int32 i,j,l,a,b,set,nsets; oldmixw = (float32 **) ckd_calloc_2d(nphones,nstates,sizeof(float32)); oldmeans = (float32***) ckd_calloc_3d(nphones,nstates,ndim,sizeof(float32)); oldvars = (float32 ***) ckd_calloc_3d(nphones,nstates,ndim,sizeof(float32)); phoneid = (char **)ckd_calloc_2d(nphones,2048,sizeof(char)); newmixw = (float32 **) ckd_calloc_2d(nphones,nstates,sizeof(float32)); newmeans = (float32***) ckd_calloc_3d(nphones,nstates,ndim,sizeof(float32)); newvars = (float32 ***) ckd_calloc_3d(nphones,nstates,ndim,sizeof(float32)); newphoneid = (char **)ckd_calloc_2d(nphones,2048,sizeof(char)); for (i=0;i<nphones;i++){ sprintf(phoneid[i],"%s",phones[i]); //Phone ids for (j=0;j<nstates;j++){ oldmixw[i][j] = mixw[i][j]; for (l=0;l<ndim;l++){ oldmeans[i][j][l] = means[i][j][l]; oldvars[i][j][l] = vars[i][j][l]; } } } for (nsets = nphones; nsets > 2; nsets--) { // Find the closest distributions findclosestpair(oldmeans,oldvars,oldmixw,nsets,stt,ndim,&a,&b); printf("Merging %s %s\n",phoneid[a],phoneid[b]); fflush(stdout); // Copy and Merge distributions... // Copy unmerged distributions first for (i=0,set=0;i<nsets;i++){ if (i != a && i != b){ sprintf(newphoneid[set],"%s",phoneid[i]); //Phone ids newmixw[set][stt] = oldmixw[i][stt]; for (l=0;l<ndim;l++){ newmeans[set][stt][l] = oldmeans[i][stt][l]; newvars[set][stt][l] = oldvars[i][stt][l]; } set++; } } // Merge a and b sprintf(newphoneid[set],"%s_%s",phoneid[a],phoneid[b]); { float32 *nm = newmeans[set][stt]; float32 *nv = newvars[set][stt]; float32 *oma = oldmeans[a][stt]; float32 *ova = oldvars[a][stt]; float32 *omb = oldmeans[b][stt]; float32 *ovb = oldvars[b][stt]; float32 cnta, cntb; cnta = oldmixw[a][stt]; cntb = oldmixw[b][stt]; newmixw[set][stt] = cnta + cntb; for (l=0;l<ndim;l++){ nm[l] = (cnta*oma[l] + cntb*omb[l]) / (cnta + cntb); nv[l] = cnta*(ova[l]+oma[l]*oma[l])+cntb*(ovb[l]+omb[l]*omb[l]); nv[l] = nv[l]/(cnta+cntb) - nm[l]*nm[l]; if (nv[l] < MINVAR) nv[l] = MINVAR; } } // Switch old and new variables tmp3d = oldmeans; oldmeans = newmeans; newmeans = tmp3d; tmp3d = oldvars; oldvars = newvars; newvars = tmp3d; tmp2d = oldmixw; oldmixw = newmixw; newmixw = tmp2d; tmpstr = phoneid; phoneid = newphoneid; newphoneid = tmpstr; } ckd_free_3d((void ***)oldmeans); ckd_free_3d((void ***)oldvars); ckd_free_3d((void ***)newmeans); ckd_free_3d((void ***)newvars); ckd_free_2d((void **)oldmixw); ckd_free_2d((void **)newmixw); ckd_free_2d((void **)newphoneid); ckd_free_2d((void **)phoneid); return 0; }
int make_tree (float32 **means, float32 **vars, float32 *mixw, int32 *nodephoneids, int32 nphones, int32 ndim, node *root, int32 npermute) { float32 **oldmeans, **oldvars, **newmeans, **newvars; float32 *oldmixw, *newmixw, **tmp2d, *tmp1d; float32 *meana, *vara, *meanb, *varb; float32 cnt, counta, countb, bestdec, reduction; int32 **phoneid, **newphoneid, *numphones, *newnumphones, **it2d, *it1d; int32 i,j,k,l,a,b,set,nsets,ncombinations,bestclust=0; char **identifier, *tmpid; node *left, *right; oldmixw = (float32 *) ckd_calloc(nphones,sizeof(float32)); oldmeans = (float32 **) ckd_calloc_2d(nphones,ndim,sizeof(float32)); oldvars = (float32 **) ckd_calloc_2d(nphones,ndim,sizeof(float32)); phoneid = (int32 **)ckd_calloc_2d(nphones,nphones,sizeof(int32)); numphones = (int32 *) ckd_calloc(nphones,sizeof(int32)); newmixw = (float32 *) ckd_calloc(nphones,sizeof(float32)); newmeans = (float32 **) ckd_calloc_2d(nphones,ndim,sizeof(float32)); newvars = (float32 **) ckd_calloc_2d(nphones,ndim,sizeof(float32)); newphoneid = (int32 **)ckd_calloc_2d(nphones,nphones,sizeof(int32)); newnumphones = (int32 *) ckd_calloc(nphones,sizeof(int32)); for (i=0;i<nphones;i++){ numphones[i] = 1; phoneid[i][0] = nodephoneids[i]; //Phone ids oldmixw[i] = mixw[nodephoneids[i]]; for (l=0;l<ndim;l++){ oldmeans[i][l] = means[nodephoneids[i]][l]; oldvars[i][l] = vars[nodephoneids[i]][l]; } } if (nphones > npermute){ for (nsets = nphones; nsets > npermute; nsets--) { // Find the closest distributions findclosestpair(oldmeans,oldvars,oldmixw,nsets,ndim,&a,&b); // printf("Merging %s %s\n",phoneid[a],phoneid[b]); fflush(stdout); // Copy and Merge distributions... // Copy unmerged distributions first for (i=0,set=0;i<nsets;i++){ if (i != a && i != b){ newnumphones[set] = numphones[i]; for (l=0;l<numphones[i];l++) newphoneid[set][l] = phoneid[i][l]; newmixw[set] = oldmixw[i]; for (l=0;l<ndim;l++){ newmeans[set][l] = oldmeans[i][l]; newvars[set][l] = oldvars[i][l]; } set++; } } // Merge a and b newnumphones[set] = numphones[a]+numphones[b]; for (i=0;i<numphones[a];i++) newphoneid[set][i] = phoneid[a][i]; for (l=0;l<numphones[b];l++,i++) newphoneid[set][i] = phoneid[b][l]; { float32 *nm = newmeans[set]; float32 *nv = newvars[set]; float32 *oma = oldmeans[a]; float32 *ova = oldvars[a]; float32 *omb = oldmeans[b]; float32 *ovb = oldvars[b]; float32 cnta, cntb; cnta = oldmixw[a]; cntb = oldmixw[b]; newmixw[set] = cnta + cntb; for (l=0;l<ndim;l++){ nm[l] = (cnta*oma[l] + cntb*omb[l]) / (cnta + cntb); nv[l] = cnta*(ova[l]+oma[l]*oma[l]) + cntb*(ovb[l]+omb[l]*omb[l]); nv[l] = nv[l]/(cnta+cntb) - nm[l]*nm[l]; if (nv[l] < MINVAR) nv[l] = MINVAR; } } // Switch old and new variables tmp2d = oldmeans; oldmeans = newmeans; newmeans = tmp2d; tmp2d = oldvars; oldvars = newvars; newvars = tmp2d; tmp1d = oldmixw; oldmixw = newmixw; newmixw = tmp1d; it2d = phoneid; phoneid = newphoneid; newphoneid = it2d; it1d = numphones; numphones = newnumphones; newnumphones = it1d; } } else npermute = nphones; if (npermute <= 2){ root->left = root->right = NULL; /* Dont split further */ return 0; } // We have npermute clusters now; permute them to get two clusters // There are 2^(npermute-1)-1 clusters possible. Test them all out. // Create identifiers for 2^(npermute-1) clusters for (i=1,ncombinations=1;i<npermute;i++,ncombinations*=2); identifier = (char **)ckd_calloc_2d(ncombinations,npermute,sizeof(char)); tmpid = (char *)ckd_calloc(npermute,sizeof(char)); for (i=0;i<ncombinations-1;i++){ for(j=0,tmpid[0]=!tmpid[0];!tmpid[j];j++,tmpid[j]=!tmpid[j]); for(j=0;j<npermute;j++) identifier[i][j] = tmpid[j]; } ckd_free(tmpid); // Go through the list and find best pair for (i=0,bestdec=-1.0e+30;i<ncombinations-1;i++){ meana = (float32 *)ckd_calloc(ndim,sizeof(float32)); vara = (float32 *)ckd_calloc(ndim,sizeof(float32)); meanb = (float32 *)ckd_calloc(ndim,sizeof(float32)); varb = (float32 *)ckd_calloc(ndim,sizeof(float32)); counta = countb = 0; for (j=0;j<npermute;j++){ float32 *om = oldmeans[j]; float32 *ov = oldvars[j]; cnt = oldmixw[j]; if (identifier[i][j]){ counta += cnt; for (k=0;k<ndim;k++){ meana[k] += cnt * om[k]; vara[k] += cnt*(ov[k] + om[k]*om[k]); } } else{ countb += cnt; for (k=0;k<ndim;k++){ meanb[k] += cnt * om[k]; varb[k] += cnt*(ov[k] + om[k]*om[k]); } } } for (k=0;k<ndim;k++){ meana[k] /= counta; meanb[k] /= countb; vara[k] = vara[k]/counta - meana[k]*meana[k]; varb[k] = varb[k]/countb - meanb[k]*meanb[k]; } reduction = likelhddec(meana,vara, meanb,varb,counta,countb,ndim); if (reduction > bestdec) { bestdec = reduction; bestclust = i; } ckd_free(meana);ckd_free(vara);ckd_free(meanb);ckd_free(varb); } // Now we know what the best separation is, set the appropriate left // and right trees. left = (node *) ckd_calloc(1,sizeof(node)); right = (node *) ckd_calloc(1,sizeof(node)); root->left = left; root->right = right; left->phoneids = (int32 *) ckd_calloc(nphones,sizeof(int32)); //Overalloc right->phoneids = (int32 *) ckd_calloc(nphones,sizeof(int32)); for (j=0;j<npermute;j++){ if (identifier[bestclust][j]){ for (l=0;l<numphones[j];l++,left->nphones++) left->phoneids[left->nphones] = phoneid[j][l]; } else { for (l=0;l<numphones[j];l++,right->nphones++) right->phoneids[right->nphones] = phoneid[j][l]; } } ckd_free_2d((void **)identifier); ckd_free_3d((void ***)oldmeans); ckd_free_3d((void ***)oldvars); ckd_free_3d((void ***)newmeans); ckd_free_3d((void ***)newvars); ckd_free_2d((void **)oldmixw); ckd_free_2d((void **)newmixw); ckd_free_2d((void **)newphoneid); ckd_free_2d((void **)phoneid); // Recurse make_tree(means,vars,mixw,left->phoneids,left->nphones,ndim, left,npermute); make_tree(means,vars,mixw,right->phoneids,right->nphones,ndim, right,npermute); return 0; }
dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; }
int32 viterbi_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g; /* Gaussian density parameters and reestimation sums */ float32 ***mixw; /* all mixing weights */ float64 ***now_den = NULL; /* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 **tacc; /* Transition matrix reestimation sum accumulators for the utterance. */ float32 ***wacc; /* mixing weight reestimation sum accumulators for the utterance. */ float32 ***denacc = NULL; /* mean/var reestimation accumulators for time t */ size_t denacc_size; /* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; timing_t *fwd_timer = NULL; timing_t *rstu_timer = NULL; timing_t *gau_timer = NULL; timing_t *rsts_timer = NULL; timing_t *rstf_timer = NULL; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 max_n_next = 0; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); /* Get the forward estimation CPU timer */ fwd_timer = timing_get("fwd"); /* Get the per utterance reestimation CPU timer */ rstu_timer = timing_get("rstu"); /* Get the Gaussian density evaluation CPU timer */ gau_timer = timing_get("gau"); /* Get the per state reestimation CPU timer */ rsts_timer = timing_get("rsts"); /* Get the per frame reestimation CPU timer */ rstf_timer = timing_get("rstf"); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ if (fwd_timer) timing_start(fwd_timer); ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, phseg, 0); /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state_seq, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (fwd_timer) timing_stop(fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; if (mixw_reest) { /* Need to reallocate mixing accumulators for utt */ if (inv->l_mixw_acc) { ckd_free_3d((void ***)inv->l_mixw_acc); inv->l_mixw_acc = NULL; } inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse, n_feat, n_density, sizeof(float32)); } wacc = inv->l_mixw_acc; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, var_is_full); if (tmat_reest) { if (inv->l_tmat_acc) { ckd_free_2d((void **)inv->l_tmat_acc); inv->l_tmat_acc = NULL; } for (i = 0; i < n_state; i++) { if (state_seq[i].n_next > max_n_next) max_n_next = state_seq[i].n_next; } inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state, max_n_next, sizeof(float32)); } /* transition matrix reestimation sum accumulators for the utterance */ tacc = inv->l_tmat_acc; n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; #if VITERBI_DEBUG printf("Following non-emitting state at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j - prev] += 1.0; } q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; if (gau_timer) timing_start(gau_timer); gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); if (gau_timer) timing_stop(gau_timer); if (rsts_timer) timing_start(rsts_timer); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the mixing * weight reestimation accumulators */ if (mixw_reest) { accum_den_terms(wacc[state_seq[j].l_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); /* check if mixw and ci_mixw are different to avoid * doubling the EM counts in a CI run. */ if (state_seq[j].mixw != state_seq[j].ci_mixw) { if (n_cb < inv->n_mixw) { /* semi-continuous, tied mixture, and discrete case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); } else { /* continuous case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } if (rsts_timer) timing_stop(rsts_timer); /* Note that there is only one state/frame so this is kind of redundant */ if (rstf_timer) timing_start(rstf_timer); if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ if (pdumpfh) fprintf(pdumpfh, "time %d:\n", t); accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, pass2var, inv->l_mixw_acc, var_is_full, pdumpfh, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (rstf_timer) timing_stop(rstf_timer); if (t > 0) { prev = active_astate[t-1][bp[t][q]]; #if VITERBI_DEBUG printf("Backtrace at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j-prev] += 1.0; } q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ if (rstu_timer) timing_start(rstu_timer); accum_global(inv, state_seq, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (rstu_timer) timing_stop(rstu_timer); /* Find the final state */ for (i = 0; i < n_active_astate[n_obs-1]; ++i) { if (active_astate[n_obs-1][i] == n_state-1) break; } /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("%s ignored\n", corpus_utt_brief_name()); return ret; }
int32 mmi_viterbi_update(vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, int32 mean_reest, int32 var_reest, float64 arc_gamma, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g;/* Gaussian density parameters and reestimation sums */ float32 ***mixw;/* all mixing weights */ float64 ***now_den = NULL;/* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 ***denacc = NULL;/* mean/var reestimation accumulators for time t */ size_t denacc_size;/* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, NULL, 1); if (cmd_ln_str("-outphsegdir")) { E_FATAL("current MMI implementation don't support -outphsegdir\n"); } if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, FALSE); n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } ret = gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); if (ret != S3_SUCCESS) goto all_done; assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } /* Note that there is only one state/frame so this is kind of redundant */ if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ mmi_accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, arc_gamma, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (t > 0) { prev = active_astate[t-1][bp[t][q]]; q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ accum_global(inv, state_seq, n_state, FALSE, FALSE, mean_reest, var_reest, FALSE); all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); ckd_free((void **)bp); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("viterbi update error in sentence %s\n", corpus_utt_brief_name()); return ret; }
/* * SPHINX-II doesn't automatically compute context independent * smoothing weights. We probably should, but wanted to get comparable * system going first. */ void interp_mixw(float32 ****out_mixw, float32 ***mixw_acc_a, float32 ***mixw_acc_b, float64 *dnom, float32 **lambda, float32 cilambda, uint32 **ci_mixw, uint32 **n_tied, uint32 n_cd_state, uint32 n_ci_state, uint32 n_mixw, uint32 n_feat, uint32 n_gau) { uint32 i, cd_i, ci_i, j, k, l; float32 uniform; float64 tt_uni, tt_ci, tt_cd; uint32 total_n_tied; E_INFO("Interpolating CD states\n"); uniform = 1.0 / (float32)n_gau; /* add b buf to a */ accum_3d(mixw_acc_a, mixw_acc_b, n_mixw, n_feat, n_gau); for (i = 0; i < n_cd_state; i++) { cd_i = i + n_ci_state; if (n_tied[i][0] != TYING_NO_ID) { /* n_tied[][] counts the number of times the CD distribution occurs with the corresponding CI distribution (in ci_mixw[][]). */ for (j = 0, total_n_tied = 0; n_tied[i][j] != TYING_NO_ID; j++) { assert(n_tied[i][j] > 0); total_n_tied += n_tied[i][j]; ci_i = ci_mixw[i][j]; assert(ci_i != TYING_NO_ID); for (k = 0; k < n_feat; k++) { for (l = 0; l < n_gau; l++) { if (mixw_acc_a[cd_i][k][l] > MIN_IEEE_NORM_POS_FLOAT32) tt_cd = lambda[i][DIST_CD] * mixw_acc_a[cd_i][k][l] * dnom[cd_i]; else tt_cd = 0; if (mixw_acc_a[ci_i][k][l] > MIN_IEEE_NORM_POS_FLOAT32) tt_ci = lambda[i][DIST_CI] * mixw_acc_a[ci_i][k][l] * dnom[ci_i]; else tt_ci = 0; tt_uni = lambda[i][DIST_UNIFORM] * uniform; if ( j == 0 ) mixw_acc_b[cd_i][k][l] = n_tied[i][j] * (tt_cd + tt_ci + tt_uni); else mixw_acc_b[cd_i][k][l] += n_tied[i][j] * (tt_cd + tt_ci + tt_uni); } } } } else { /* for unobserved tied states, make flat */ float32 uni = 1.0 / (float)n_gau; for (k = 0; k < n_feat; k++) for (l = 0; l < n_gau; l++) mixw_acc_b[cd_i][k][l] = uni; total_n_tied = 1; } /* avg the probs */ for (k = 0; k < n_feat; k++) for (l = 0; l < n_gau; l++) mixw_acc_b[cd_i][k][l] /= (float32)total_n_tied; } /* interpolate CI distributions with uniform distribution */ interp_counts_3d_uniform(mixw_acc_a, 0, /* start state */ n_ci_state, /* run length */ n_feat, n_gau, cilambda); /* move CI ones to the B buffer, since A will be freed */ for (i = 0; i < n_ci_state; i++) { for (j = 0; j < n_feat; j++) { for (k = 0; k < n_gau; k++) { mixw_acc_b[i][j][k] = mixw_acc_a[i][j][k]; } } } *out_mixw = mixw_acc_b; ckd_free_3d((void ***)mixw_acc_a); }
float64 set_best_quest(dtree_node_t *node, float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 **dfeat, uint32 n_dfeat, float32 mwfloor) { float32 ***dist; float64 norm; uint32 s, j, k; dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); /* Convert occ. counts to probabilities. norm now has total occ. count */ for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { dist[s][j][k] = node->mixw_occ[s][j][k]; } } for (k = 0, norm = 0; k < n_density; k++) { norm += dist[s][0][k]; } norm = 1.0 / norm; for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { dist[s][j][k] *= norm; if (dist[s][j][k] < mwfloor) { dist[s][j][k] = mwfloor; } } } } node->wt_ent_dec = best_q(mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, dfeat, n_dfeat, all_q, n_all_q, pset, node->id, node->n_id, dist, node->wt_ent, (quest_t **)&node->q); ckd_free_3d((void ***)dist); return node->wt_ent_dec; }
static void gauden_param_free(vector_t *** p) { ckd_free(p[0][0][0]); ckd_free_3d((void ***) p); }
static void gauden_param_free(mfcc_t **** p) { ckd_free(p[0][0][0]); ckd_free_3d(p); }
int main(int argc, char *argv[]) { model_def_t *mdef; uint32 n_tmat; uint32 n_tied_state; uint32 n_state_pm; uint32 n_stream; uint32 n_density; float32 ***tmat; float32 **proto_tmat; float32 ***mixw; uint32 i, j, k; float32 mixw_ini; int retval = 0; parse_cmd_ln(argc, argv); printf("%s(%d): Reading model definition file %s\n", __FILE__, __LINE__, cmd_ln_str("-moddeffn")); if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { return 1; } printf("%s(%d): %d models defined\n", __FILE__, __LINE__, mdef->n_defn); if (!cmd_ln_str("-tmatfn") && ! cmd_ln_str("-mixwfn")){ E_FATAL("Both -tmatfn and -mixwfn were not specified, forced exit\n"); } if (cmd_ln_str("-tmatfn")) { if (topo_read(&proto_tmat, &n_state_pm, cmd_ln_str("-topo")) != S3_SUCCESS) return 1; /* proto_tmat is normalized */ n_tmat = mdef->n_tied_tmat; tmat = (float32 ***)ckd_calloc_3d(n_tmat, n_state_pm-1, n_state_pm, sizeof(float32)); for (k = 0; k < n_tmat; k++) { for (i = 0; i < n_state_pm-1; i++) { for (j = 0; j < n_state_pm; j++) { /* perhaps this could be replaced with a block copy per tmat */ tmat[k][i][j] = proto_tmat[i][j]; } } } if (s3tmat_write(cmd_ln_str("-tmatfn"), tmat, n_tmat, n_state_pm) != S3_SUCCESS) { retval = 1; } ckd_free_3d((void ***)tmat); } else { E_INFO("No tmat file given; none generated\n"); } n_tied_state = mdef->n_tied_state; n_stream = cmd_ln_int32("-nstream"); n_density = cmd_ln_int32("-ndensity"); mixw = (float32 ***)ckd_calloc_3d(n_tied_state, n_stream, n_density, sizeof(float32)); /* weight each density uniformly */ mixw_ini = 1.0 / (float)n_density; for (i = 0; i < n_tied_state; i++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { mixw[i][j][k] = mixw_ini; } } } if (cmd_ln_str("-mixwfn")) { if (s3mixw_write(cmd_ln_str("-mixwfn"), mixw, n_tied_state, n_stream, n_density) != S3_SUCCESS) { retval = 2; } } else { E_INFO("No mixw file given; none generated\n"); } ckd_free_3d((void ***)mixw); return retval; }
int32 gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config) { int32 i, m, f, d, *flen; float32 ****fgau; /* Free data if already here */ if (g->mean) gauden_param_free(g->mean); if (g->var) gauden_param_free(g->var); if (g->det) ckd_free_3d(g->det); if (g->featlen) ckd_free(g->featlen); g->mean = 0; g->var = 0; g->det = 0; g->featlen = 0; /* Reload means and variances (un-precomputed). */ fgau = 0; gauden_param_read(&fgau, &g->n_mgau, &g->n_feat, &g->n_density, &g->featlen, cmd_ln_str_r(config, "-mean")); g->mean = (mfcc_t ****)fgau; fgau = 0; gauden_param_read(&fgau, &m, &f, &d, &flen, cmd_ln_str_r(config, "-var")); g->var = (mfcc_t ****)fgau; /* Verify mean and variance parameter dimensions */ if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) E_FATAL ("Mixture-gaussians dimensions for means and variances differ\n"); for (i = 0; i < g->n_feat; i++) if (g->featlen[i] != flen[i]) E_FATAL("Feature lengths for means and variances differ\n"); ckd_free(flen); /* Transform codebook for each stream s */ for (i = 0; i < g->n_mgau; ++i) { for (f = 0; f < g->n_feat; ++f) { float64 *temp; temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64)); /* Transform each density d in selected codebook */ for (d = 0; d < g->n_density; d++) { int l; for (l = 0; l < g->featlen[f]; l++) { temp[l] = 0.0; for (m = 0; m < g->featlen[f]; m++) { /* FIXME: For now, only one class, hence the zeros below. */ temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m]; } temp[l] += mllr->b[f][0][l]; } for (l = 0; l < g->featlen[f]; l++) { g->mean[i][f][d][l] = (float32) temp[l]; g->var[i][f][d][l] *= mllr->h[f][0][l]; } } ckd_free(temp); } } /* Re-precompute (if we aren't adapting variances this isn't * actually necessary...) */ gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor")); return 0; }
int main(int argc, char *argv[]) { feat_t *fcb; mfcc_t **in_feats, ***out_feats, ***out_feats2, ***optr; int32 i, j, ncep, nfr, nfr1, nfr2; in_feats = (mfcc_t **)ckd_alloc_2d_ptr(6, 13, data, sizeof(mfcc_t)); out_feats = (mfcc_t ***)ckd_calloc_3d(8, 1, 39, sizeof(mfcc_t)); /* Test 1s_c_d_dd features */ fcb = feat_init("1s_c_d_dd", CMN_NONE, 0, AGC_NONE, 1, 13); ncep = 6; nfr1 = feat_s2mfc2feat_live(fcb, in_feats, &ncep, 1, 1, out_feats); printf("Processed %d input %d output frames\n", ncep, nfr1); for (i = 0; i < nfr1; ++i) { printf("%d: ", i); for (j = 0; j < 39; ++j) { printf("%.3f ", MFCC2FLOAT(out_feats[i][0][j])); } printf("\n"); } feat_free(fcb); /* Test in "live" mode. */ fcb = feat_init("1s_c_d_dd", CMN_NONE, 0, AGC_NONE, 1, 13); optr = out_feats2 = (mfcc_t ***)ckd_calloc_3d(8, 1, 39, sizeof(mfcc_t)); nfr2 = 0; ncep = 2; nfr = feat_s2mfc2feat_live(fcb, in_feats, &ncep, TRUE, FALSE, optr); printf("Processed %d input %d output frames\n", ncep, nfr); nfr2 += nfr; for (i = 0; i < nfr; ++i) { printf("%d: ", i); for (j = 0; j < 39; ++j) { printf("%.3f ", MFCC2FLOAT(optr[i][0][j])); } printf("\n"); } optr += nfr; ncep = 2; nfr = feat_s2mfc2feat_live(fcb, in_feats + 2, &ncep, FALSE, FALSE, optr); nfr2 += nfr; printf("Processed %d input %d output frames\n", ncep, nfr); for (i = 0; i < nfr; ++i) { printf("%d: ", i); for (j = 0; j < 39; ++j) { printf("%.3f ", MFCC2FLOAT(optr[i][0][j])); } printf("\n"); } optr += nfr; ncep = 2; nfr = feat_s2mfc2feat_live(fcb, in_feats + 4, &ncep, FALSE, TRUE, optr); nfr2 += nfr; printf("Processed %d input %d output frames\n", ncep, nfr); for (i = 0; i < nfr; ++i) { printf("%d: ", i); for (j = 0; j < 39; ++j) { printf("%.3f ", MFCC2FLOAT(optr[i][0][j])); } printf("\n"); } optr += nfr; feat_free(fcb); TEST_EQUAL(nfr1, nfr2); for (i = 0; i < nfr1; ++i) { for (j = 0; j < 39; ++j) { TEST_EQUAL(out_feats[i][0][j], out_feats2[i][0][j]); } } ckd_free_3d(out_feats2); ckd_free_3d(out_feats); ckd_free(in_feats); return 0; }