cst_utterance *cst_spamf0(cst_utterance *utt) { cst_track *spamf0_track = NULL; cst_track *param_track = NULL; cst_item *s; cst_cg_db *cg_db; const cst_cart *acc_tree, *phrase_tree; float end, f0val, syldur; int num_frames, f, i; cg_db = val_cg_db(utt_feat_val(utt, "cg_db")); spamf0_track = new_track(); cst_track_resize(spamf0_track, (utt_feat_int(utt, "param_track_num_frames")), 1); acc_tree = cg_db->spamf0_accent_tree; phrase_tree = cg_db->spamf0_phrase_tree; end = 0.0; num_frames = 0; for (s = utt_rel_head(utt, "Segment"); s; s = item_next(s)) { end = ffeature_float(s, "end"); if (cst_streq("pau", ffeature_string(s, "name"))) { f0val = 0; } else { f0val = val_float(cart_interpret(s, phrase_tree)); } for (; ((num_frames * cg_db->frame_advance) <= end) && (num_frames < utt_feat_int(utt, "param_track_num_frames")); num_frames++) { spamf0_track->frames[num_frames][0] = f0val; } } for (s = utt_rel_head(utt, "Syllable"); s; s = item_next(s)) { f = val_int(cart_interpret(s, acc_tree)); syldur = ffeature_float(s, "R:SylStructure.daughtern.R:Segment.end") - ffeature_float(s, "R:SylStructure.daughter1.R:Segment.p.end"); cst_synthtilt(cg_db, ffeature_float(s, "R:SylStructure.daughter1.R:Segment.p.end"), cg_db->spamf0_accent_vectors[f][0], cg_db->spamf0_accent_vectors[f][2], syldur, cg_db->spamf0_accent_vectors[f][6], spamf0_track); } param_track = val_track(utt_feat_val(utt, "param_track")); for (i = 0; i < utt_feat_int(utt, "param_track_num_frames"); i++) { param_track->frames[i][0] = spamf0_track->frames[i][0]; } delete_track(spamf0_track); return utt; }
cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db) { /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */ MLPGPARA param = NODATA; cst_track *out; int dim, dim_st; // float like; int i,j; int nframes; PStreamChol pst; nframes = param_track->num_frames; dim = (param_track->num_channels/2)-1; dim_st = dim/2; /* dim2 in original code */ out = new_track(); cst_track_resize(out,nframes,dim_st+1); param = xmlpgpara_init(dim,dim_st,nframes,nframes); // mixture-index sequence param->clsidxv = xlvalloc(nframes); for (i=0; i<nframes; i++) param->clsidxv->data[i] = i; // initial static feature sequence param->stm = xdmalloc(nframes,dim_st); for (i=0; i<nframes; i++) { for (j=0; j<dim_st; j++) param->stm->data[i][j] = param_track->frames[i][(j+1)*2]; } /* Load cluster means */ for (i=0; i<nframes; i++) for (j=0; j<dim_st; j++) param->mean->data[i][j] = param_track->frames[i][(j+1)*2]; /* GMM parameters diagonal covariance */ InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes); param->pdf = xdmalloc(nframes,dim*2); param->cov = xdmalloc(nframes,dim); for (i=0; i<nframes; i++) for (j=0; j<dim; j++) param->cov->data[i][j] = param_track->frames[i][(j+1)*2+1] * param_track->frames[i][(j+1)*2+1]; param->detvec = xget_detvec_diamat2inv(param->cov); /* global variance parameters */ /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */ get_dltmat(param->stm, &pst.dw, 1, param->dltm); //like = get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param, param_track->frames, XTRUE); /* vlike = get_like_gv(dim2, dnum, param); */ mlgparaChol(param->pdf, &pst, param->stm); /* Put the answer back into the output track */ for (i=0; i<nframes; i++) { out->times[i] = param_track->times[i]; out->frames[i][0] = param_track->frames[i][0]; /* F0 */ for (j=0; j<dim_st; j++) out->frames[i][j+1] = param->stm->data[i][j]; } // memory free xmlpgparafree(param); pst_free(&pst); return out; }
static cst_utterance *cg_predict_params(cst_utterance *utt) { cst_cg_db *cg_db; cst_track *param_track; cst_track *str_track = NULL; cst_item *mcep; const cst_cart *mcep_tree, *f0_tree; int i,j,f,p,fd,o; const char *mname; float f0_val; int fff; int extra_feats = 0; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); param_track = new_track(); if (cg_db->do_mlpg) /* which should be the default */ fff = 1; /* copy details with stddevs */ else fff = 2; /* copy details without stddevs */ extra_feats = 1; /* voicing */ if (cg_db->mixed_excitation) { extra_feats += 5; str_track = new_track(); cst_track_resize(str_track, utt_feat_int(utt,"param_track_num_frames"), 5); } cst_track_resize(param_track, utt_feat_int(utt,"param_track_num_frames"), (cg_db->num_channels0/fff)- (2 * extra_feats));/* no voicing or str */ for (i=0,mcep=utt_rel_head(utt,"mcep"); mcep; i++,mcep=item_next(mcep)) { mname = item_feat_string(mcep,"name"); for (p=0; cg_db->types[p]; p++) if (cst_streq(mname,cg_db->types[p])) break; if (cg_db->types[0] == NULL) p=0; /* if there isn't a matching tree, use the first one */ /* Predict F0 */ f0_tree = cg_db->f0_trees[p]; f0_val = val_float(cart_interpret(mcep,f0_tree)); param_track->frames[i][0] = f0_val; /* what about stddev ? */ if (cg_db->multimodel) { /* MULTI model */ f = val_int(cart_interpret(mcep,cg_db->param_trees0[p])); fd = val_int(cart_interpret(mcep,cg_db->param_trees1[p])); item_set_int(mcep,"clustergen_param_frame",f); param_track->frames[i][0] = (param_track->frames[i][0]+ CG_MODEL_VECTOR(cg_db,model_vectors0,f,0)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,0))/3.0; for (j=2; j<param_track->num_channels; j++) param_track->frames[i][j] = (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(j)*fff))/2.0; if (cg_db->mixed_excitation) { o = j; for (j=0; j<5; j++) { str_track->frames[i][j] = (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(o+(2*j))*fff))/2.0; } } } else { /* SINGLE model */ /* Predict Spectral */ mcep_tree = cg_db->param_trees0[p]; f = val_int(cart_interpret(mcep,mcep_tree)); item_set_int(mcep,"clustergen_param_frame",f); param_track->frames[i][0] = (param_track->frames[i][0]+ CG_MODEL_VECTOR(cg_db,model_vectors0,f,0))/2.0; for (j=2; j<param_track->num_channels; j++) param_track->frames[i][j] = CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff); if (cg_db->mixed_excitation) { o = j; for (j=0; j<5; j++) { str_track->frames[i][j] = CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff); } } } /* last coefficient is average voicing for cluster */ item_set_float(mcep,"voicing", CG_MODEL_VECTOR(cg_db,model_vectors0,f, cg_db->num_channels0-2)); param_track->times[i] = i * cg_db->frame_advance; } cg_smooth_F0(utt,cg_db,param_track); utt_set_feat(utt,"param_track",track_val(param_track)); if (cg_db->mixed_excitation) utt_set_feat(utt,"str_track",track_val(str_track)); return utt; }
int cst_track_load_est(cst_track *t, const char *filename) { cst_tokenstream *ts; const char *tok; int num_frames, num_channels; int i, ascii = 1, swap = 0, rv; num_frames = 0; num_channels = 0; ts = ts_open(filename, NULL, NULL, NULL, NULL); if (ts == NULL) { cst_errmsg("cst_track_load: can't open file \"%s\"\n", filename); return -1; } if (!cst_streq(ts_get(ts), "EST_File")) { cst_errmsg("cst_track_load: not an EST file \"%s\"\n", filename); ts_close(ts); return -1; } if (!cst_streq(ts_get(ts), "Track")) { cst_errmsg("cst_track_load: not an track file \"%s\"\n", filename); ts_close(ts); return -1; } while (!cst_streq("EST_Header_End", (tok = ts_get(ts)))) { if (cst_streq("DataType", tok)) { tok = ts_get(ts); if (cst_streq("ascii", tok)) { ascii = 1; } else if (cst_streq("binary", tok)) { ascii = 0; } else { cst_errmsg("cst_track_load: don't know how to deal " "with type \"%s\"\n", tok); ts_close(ts); return -1; } } else if (cst_streq("ByteOrder", tok)) { tok = ts_get(ts); swap = (cst_streq(tok, BYTE_ORDER_BIG) && CST_LITTLE_ENDIAN) || (cst_streq(tok, BYTE_ORDER_LITTLE) && CST_BIG_ENDIAN); } else if (cst_streq("NumFrames", tok)) num_frames = atoi(ts_get(ts)); else if (cst_streq("NumChannels", tok)) num_channels = atoi(ts_get(ts)); else ts_get(ts); if (ts_eof(ts)) { cst_errmsg("cst_track_load: EOF in header \"%s\"\n", filename); ts_close(ts); return -1; } } cst_track_resize(t, num_frames, num_channels); for (i = 0; i < t->num_frames; i++) { if (ascii) rv = load_frame_ascii(t, i, ts); else rv = load_frame_binary(t, i, ts, swap); if (rv < 0) { ts_close(ts); cst_errmsg("cst_track_load: EOF in data \"%s\"\n", filename); return rv; } } ts_get(ts); if (!ts_eof(ts)) { cst_errmsg("cst_track_load: not EOF when expected \"%s\"\n", filename); ts_close(ts); return -1; } ts_close(ts); return 0; }