cst_utterance *cst_spamf0(cst_utterance *utt) { cst_track *spamf0_track=NULL; cst_track *param_track=NULL; cst_item *s; cst_cg_db *cg_db; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); const cst_cart *acc_tree, *phrase_tree; float start,end,f0val; int num_frames,f,i; spamf0_track=new_track(); param_track=new_track(); cst_track_resize(spamf0_track, (utt_feat_int(utt,"param_track_num_frames")), 1); acc_tree = cg_db->spamf0_accent_tree; phrase_tree = cg_db->spamf0_phrase_tree; end = 0.0; num_frames = 0; for (s = utt_rel_head(utt,"Segment"); s; s=item_next(s)) { start = end; end = start + ffeature_float(s,"segment_duration"); if(!strcmp("pau",ffeature_string(s,"name"))) { f0val=0; } else { f0val=val_float(cart_interpret(s,phrase_tree)); } for ( ; ((num_frames * cg_db->frame_advance) <= end) && (num_frames < utt_feat_int(utt,"param_track_num_frames")); num_frames++) { spamf0_track->frames[num_frames][0]=f0val; } } for (s=utt_rel_head(utt,"Syllable"); s; s=item_next(s)) { f = val_int(cart_interpret(s,acc_tree)); cst_synthtilt(cg_db, ffeature_float(s,"R:SylStructure.daughter1.R:Segment.p.end"), cg_db->spamf0_accent_vectors[f][0], cg_db->spamf0_accent_vectors[f][2], ffeature_float(s,"syllable_duration"), cg_db->spamf0_accent_vectors[f][6], spamf0_track); } param_track = val_track(utt_feat_val(utt,"param_track")); for (i=0;i<utt_feat_int(utt,"param_track_num_frames");i++) { param_track->frames[i][0]=spamf0_track->frames[i][0]; } delete_track(spamf0_track); return utt; }
static bool parse_track_dict( demux_t *p_demux, input_item_node_t *p_input_node, track_elem_t *p_track, xml_reader_t *p_xml_reader, const char *psz_element, xml_elem_hnd_t *p_handlers ) { VLC_UNUSED(psz_element); VLC_UNUSED(p_handlers); input_item_t *p_new_input = NULL; int i_ret; p_track = new_track(); xml_elem_hnd_t track_elements[] = // sunqueen modify start // { {"array", COMPLEX_CONTENT, {.cmplx = skip_element} }, // {"key", SIMPLE_CONTENT, {.smpl = save_data} }, // {"integer", SIMPLE_CONTENT, {.smpl = save_data} }, // {"string", SIMPLE_CONTENT, {.smpl = save_data} }, // {"date", SIMPLE_CONTENT, {.smpl = save_data} }, { {"array", COMPLEX_CONTENT, {(bool (__cdecl *)(track_elem_t *,const char *,char *))skip_element} }, {"key", SIMPLE_CONTENT, {save_data} }, {"integer", SIMPLE_CONTENT, {save_data} }, {"string", SIMPLE_CONTENT, {save_data} }, {"date", SIMPLE_CONTENT, {save_data} }, // sunqueen modify end {"true", SIMPLE_CONTENT, {NULL} }, {"false", SIMPLE_CONTENT, {NULL} }, {NULL, UNKNOWN_CONTENT, {NULL} } }; i_ret = parse_dict( p_demux, p_input_node, p_track, p_xml_reader, "dict", track_elements ); msg_Dbg( p_demux, "name: %s, artist: %s, album: %s, genre: %s, trackNum: %s, location: %s", p_track->name, p_track->artist, p_track->album, p_track->genre, p_track->trackNum, p_track->location ); if( !p_track->location ) { msg_Err( p_demux, "Track needs Location" ); free_track( p_track ); return false; } msg_Info( p_demux, "Adding '%s'", p_track->location ); p_new_input = input_item_New( p_track->location, NULL ); input_item_node_AppendItem( p_input_node, p_new_input ); /* add meta info */ add_meta( p_new_input, p_track ); vlc_gc_decref( p_new_input ); p_demux->p_sys->i_ntracks++; free_track( p_track ); return i_ret; }
static void _get_toc_callback(CDTOCDescriptor *track, void *user_data) { PyObject *tobj = new_track(track); if(PyList_Check((PyObject*)user_data)) { PyList_Append((PyObject*)user_data, tobj); } else { PyObject_CallMethod((PyObject*)user_data, "append", "O", tobj); } Py_XDECREF(tobj); }
int main(int argc, char **argv) { cst_track *lpc; cst_wave *sig, *sig2; cst_sts *sts; if (argc != 6) { fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n"); return 1; } lpc_min = atof(argv[1]); lpc_range = atof(argv[2]); lpc = new_track(); cst_track_load_est(lpc,argv[3]); sig = new_wave(); if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT) { fprintf(stderr, "cannot load waveform, format unrecognized, from \"%s\"\n", argv[4]); exit(-1); } sts = find_sts(sig,lpc); /* See if it worked */ sig2 = reconstruct_wave(sig,sts,lpc); compare_waves(sig,sig2); cst_wave_save_riff(sig2,"sig2.wav"); save_sts(sts,lpc,sig,argv[5]); return 0; }
cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db) { /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */ MLPGPARA param = NODATA; cst_track *out; int dim, dim_st; // float like; int i,j; int nframes; PStreamChol pst; nframes = param_track->num_frames; dim = (param_track->num_channels/2)-1; dim_st = dim/2; /* dim2 in original code */ out = new_track(); cst_track_resize(out,nframes,dim_st+1); param = xmlpgpara_init(dim,dim_st,nframes,nframes); // mixture-index sequence param->clsidxv = xlvalloc(nframes); for (i=0; i<nframes; i++) param->clsidxv->data[i] = i; // initial static feature sequence param->stm = xdmalloc(nframes,dim_st); for (i=0; i<nframes; i++) { for (j=0; j<dim_st; j++) param->stm->data[i][j] = param_track->frames[i][(j+1)*2]; } /* Load cluster means */ for (i=0; i<nframes; i++) for (j=0; j<dim_st; j++) param->mean->data[i][j] = param_track->frames[i][(j+1)*2]; /* GMM parameters diagonal covariance */ InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes); param->pdf = xdmalloc(nframes,dim*2); param->cov = xdmalloc(nframes,dim); for (i=0; i<nframes; i++) for (j=0; j<dim; j++) param->cov->data[i][j] = param_track->frames[i][(j+1)*2+1] * param_track->frames[i][(j+1)*2+1]; param->detvec = xget_detvec_diamat2inv(param->cov); /* global variance parameters */ /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */ get_dltmat(param->stm, &pst.dw, 1, param->dltm); //like = get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param, param_track->frames, XTRUE); /* vlike = get_like_gv(dim2, dnum, param); */ mlgparaChol(param->pdf, &pst, param->stm); /* Put the answer back into the output track */ for (i=0; i<nframes; i++) { out->times[i] = param_track->times[i]; out->frames[i][0] = param_track->frames[i][0]; /* F0 */ for (j=0; j<dim_st; j++) out->frames[i][j+1] = param->stm->data[i][j]; } // memory free xmlpgparafree(param); pst_free(&pst); return out; }
static cst_utterance *cg_predict_params(cst_utterance *utt) { cst_cg_db *cg_db; cst_track *param_track; cst_track *str_track = NULL; cst_item *mcep; const cst_cart *mcep_tree, *f0_tree; int i,j,f,p,fd,o; const char *mname; float f0_val; int fff; int extra_feats = 0; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); param_track = new_track(); if (cg_db->do_mlpg) /* which should be the default */ fff = 1; /* copy details with stddevs */ else fff = 2; /* copy details without stddevs */ extra_feats = 1; /* voicing */ if (cg_db->mixed_excitation) { extra_feats += 5; str_track = new_track(); cst_track_resize(str_track, utt_feat_int(utt,"param_track_num_frames"), 5); } cst_track_resize(param_track, utt_feat_int(utt,"param_track_num_frames"), (cg_db->num_channels0/fff)- (2 * extra_feats));/* no voicing or str */ for (i=0,mcep=utt_rel_head(utt,"mcep"); mcep; i++,mcep=item_next(mcep)) { mname = item_feat_string(mcep,"name"); for (p=0; cg_db->types[p]; p++) if (cst_streq(mname,cg_db->types[p])) break; if (cg_db->types[0] == NULL) p=0; /* if there isn't a matching tree, use the first one */ /* Predict F0 */ f0_tree = cg_db->f0_trees[p]; f0_val = val_float(cart_interpret(mcep,f0_tree)); param_track->frames[i][0] = f0_val; /* what about stddev ? */ if (cg_db->multimodel) { /* MULTI model */ f = val_int(cart_interpret(mcep,cg_db->param_trees0[p])); fd = val_int(cart_interpret(mcep,cg_db->param_trees1[p])); item_set_int(mcep,"clustergen_param_frame",f); param_track->frames[i][0] = (param_track->frames[i][0]+ CG_MODEL_VECTOR(cg_db,model_vectors0,f,0)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,0))/3.0; for (j=2; j<param_track->num_channels; j++) param_track->frames[i][j] = (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(j)*fff))/2.0; if (cg_db->mixed_excitation) { o = j; for (j=0; j<5; j++) { str_track->frames[i][j] = (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff)+ CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(o+(2*j))*fff))/2.0; } } } else { /* SINGLE model */ /* Predict Spectral */ mcep_tree = cg_db->param_trees0[p]; f = val_int(cart_interpret(mcep,mcep_tree)); item_set_int(mcep,"clustergen_param_frame",f); param_track->frames[i][0] = (param_track->frames[i][0]+ CG_MODEL_VECTOR(cg_db,model_vectors0,f,0))/2.0; for (j=2; j<param_track->num_channels; j++) param_track->frames[i][j] = CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff); if (cg_db->mixed_excitation) { o = j; for (j=0; j<5; j++) { str_track->frames[i][j] = CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff); } } } /* last coefficient is average voicing for cluster */ item_set_float(mcep,"voicing", CG_MODEL_VECTOR(cg_db,model_vectors0,f, cg_db->num_channels0-2)); param_track->times[i] = i * cg_db->frame_advance; } cg_smooth_F0(utt,cg_db,param_track); utt_set_feat(utt,"param_track",track_val(param_track)); if (cg_db->mixed_excitation) utt_set_feat(utt,"str_track",track_val(str_track)); return utt; }
int main(int argc, char **argv) { cst_track *t1; cst_track *me_filters = NULL; cst_wave *w1, *w2, *res = NULL; cst_val *files; cst_features *args; int i, j; int order, o, s; int frame_length; float *lpcs, *residual; float m; const char *f1, *f2; const char *resfn = NULL; int last_peak = 0, next_peak; int period; float power; int rfc = 0; int str = 0; int fn, fo, ss; float xpulse, xnoise; float fxpulse, fxnoise; float x, me; float *hpulse = NULL, *hnoise = NULL; float *xpulsesig = NULL, *xnoisesig = NULL; int q = 0; int position; int lpc_start = 0; args = new_features(); files = cst_args(argv, argc, "usage: lpc_resynth OPTIONS INTRACK OUTWAVE\n" "Resynth an lpc track\n" "-res <string> residual (as waveform)\n" "-save_res Save the generated residual\n" "-lpc_start <int> start of lpc params in lpc track {1}\n" "-order <int> LPC order {16}\n" "-str mixed excitation strengths\n" "-me_filters <string> mixed excitation filters\n" "-rfc Coefficents are reflection coefficients\n", args); f1 = val_string(val_car(files)); f2 = val_string(val_car(val_cdr(files))); t1 = new_track(); lpc_start = mimic_get_param_int(args, "-lpc_start", 1); if (feat_present(args, "-rfc")) rfc = 1; if (feat_present(args, "-str")) str = 1; if (feat_present(args, "-me_filters")) { me_filters = new_track(); if (cst_track_load_est (me_filters, mimic_get_param_string(args, "-me_filters", "me_filters.track")) != CST_OK_FORMAT) { fprintf(stderr, "lpc_resynth: can't read file or wrong format \"%s\"\n", f1); return -1; } hpulse = cst_alloc(float, me_filters->num_channels); hnoise = cst_alloc(float, me_filters->num_channels); xpulsesig = cst_alloc(float, me_filters->num_channels); xnoisesig = cst_alloc(float, me_filters->num_channels); }