cst_utterance *cst_spamf0(cst_utterance *utt)
{
    cst_track *spamf0_track=NULL;
    cst_track *param_track=NULL;
    cst_item *s;
    cst_cg_db *cg_db;
    cg_db = val_cg_db(utt_feat_val(utt,"cg_db"));
    const cst_cart *acc_tree, *phrase_tree;
    float start,end,f0val;
    int num_frames,f,i;
    spamf0_track=new_track();
    param_track=new_track();
    cst_track_resize(spamf0_track,
                     (utt_feat_int(utt,"param_track_num_frames")),
                     1);
    acc_tree = cg_db->spamf0_accent_tree;
    phrase_tree = cg_db->spamf0_phrase_tree;
    end = 0.0;
    num_frames = 0;
    for (s = utt_rel_head(utt,"Segment"); s; s=item_next(s))
    {
        start = end;
        end = start + ffeature_float(s,"segment_duration");
        if(!strcmp("pau",ffeature_string(s,"name")))
        {
            f0val=0;
        }
        else
        {
            f0val=val_float(cart_interpret(s,phrase_tree));
        }

        for ( ; ((num_frames * cg_db->frame_advance) <= end) && (num_frames < utt_feat_int(utt,"param_track_num_frames")); num_frames++)
        {
            spamf0_track->frames[num_frames][0]=f0val;
        }
    }

    for (s=utt_rel_head(utt,"Syllable"); s; s=item_next(s))
    {
        f = val_int(cart_interpret(s,acc_tree));
        cst_synthtilt(cg_db,
		      ffeature_float(s,"R:SylStructure.daughter1.R:Segment.p.end"),
                      cg_db->spamf0_accent_vectors[f][0],
                      cg_db->spamf0_accent_vectors[f][2],
		      ffeature_float(s,"syllable_duration"),
                      cg_db->spamf0_accent_vectors[f][6],
                      spamf0_track);
    }
    param_track = val_track(utt_feat_val(utt,"param_track"));
    for (i=0;i<utt_feat_int(utt,"param_track_num_frames");i++)
    {
        param_track->frames[i][0]=spamf0_track->frames[i][0];
    }
    delete_track(spamf0_track);
    return utt;
}
示例#2
0
static bool parse_track_dict( demux_t *p_demux, input_item_node_t *p_input_node,
                              track_elem_t *p_track, xml_reader_t *p_xml_reader,
                              const char *psz_element,
                              xml_elem_hnd_t *p_handlers )
{
    VLC_UNUSED(psz_element); VLC_UNUSED(p_handlers);
    input_item_t *p_new_input = NULL;
    int i_ret;
    p_track = new_track();

    xml_elem_hnd_t track_elements[] =
	// sunqueen modify start
//        { {"array",   COMPLEX_CONTENT, {.cmplx = skip_element} },
//          {"key",     SIMPLE_CONTENT,  {.smpl = save_data} },
//          {"integer", SIMPLE_CONTENT,  {.smpl = save_data} },
//          {"string",  SIMPLE_CONTENT,  {.smpl = save_data} },
//          {"date",    SIMPLE_CONTENT,  {.smpl = save_data} },
        { {"array",   COMPLEX_CONTENT, {(bool (__cdecl *)(track_elem_t *,const char *,char *))skip_element} },
          {"key",     SIMPLE_CONTENT,  {save_data} },
          {"integer", SIMPLE_CONTENT,  {save_data} },
          {"string",  SIMPLE_CONTENT,  {save_data} },
          {"date",    SIMPLE_CONTENT,  {save_data} },
	// sunqueen modify end
          {"true",    SIMPLE_CONTENT,  {NULL} },
          {"false",   SIMPLE_CONTENT,  {NULL} },
          {NULL,      UNKNOWN_CONTENT, {NULL} }
        };

    i_ret = parse_dict( p_demux, p_input_node, p_track,
                        p_xml_reader, "dict", track_elements );

    msg_Dbg( p_demux, "name: %s, artist: %s, album: %s, genre: %s, trackNum: %s, location: %s",
             p_track->name, p_track->artist, p_track->album, p_track->genre, p_track->trackNum, p_track->location );

    if( !p_track->location )
    {
        msg_Err( p_demux, "Track needs Location" );
        free_track( p_track );
        return false;
    }

    msg_Info( p_demux, "Adding '%s'", p_track->location );
    p_new_input = input_item_New( p_track->location, NULL );
    input_item_node_AppendItem( p_input_node, p_new_input );

    /* add meta info */
    add_meta( p_new_input, p_track );
    vlc_gc_decref( p_new_input );

    p_demux->p_sys->i_ntracks++;

    free_track( p_track );
    return i_ret;
}
示例#3
0
static void
_get_toc_callback(CDTOCDescriptor *track, void *user_data)
{
    PyObject *tobj = new_track(track);
    if(PyList_Check((PyObject*)user_data))
    {
        PyList_Append((PyObject*)user_data, tobj);
    }
    else
    {
        PyObject_CallMethod((PyObject*)user_data, "append", "O", tobj);
    }
    Py_XDECREF(tobj);
}
示例#4
0
int main(int argc, char **argv)
{
    cst_track *lpc;
    cst_wave *sig, *sig2;
    cst_sts *sts;

    if (argc != 6)
    {
	fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n");
	return 1;
    }

    lpc_min = atof(argv[1]);
    lpc_range = atof(argv[2]);

    lpc = new_track();
    cst_track_load_est(lpc,argv[3]);
    sig = new_wave();
    if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT)
    {
	fprintf(stderr,
		"cannot load waveform, format unrecognized, from \"%s\"\n",
		argv[4]);
	exit(-1);
    }

    sts = find_sts(sig,lpc);

    /* See if it worked */
    sig2 = reconstruct_wave(sig,sts,lpc);

    compare_waves(sig,sig2);
    cst_wave_save_riff(sig2,"sig2.wav");

    save_sts(sts,lpc,sig,argv[5]);

    return 0;
}
示例#5
0
文件: cst_mlpg.c 项目: Seb-Leb/mimic
cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db)
{
    /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
    MLPGPARA param = NODATA;
    cst_track *out;
    int dim, dim_st;
    //    float like;
    int i,j;
    int nframes;
    PStreamChol pst;

    nframes = param_track->num_frames;
    dim = (param_track->num_channels/2)-1;
    dim_st = dim/2; /* dim2 in original code */
    out = new_track();
    cst_track_resize(out,nframes,dim_st+1);

    param = xmlpgpara_init(dim,dim_st,nframes,nframes);

    // mixture-index sequence
    param->clsidxv = xlvalloc(nframes);
    for (i=0; i<nframes; i++)
        param->clsidxv->data[i] = i;

    // initial static feature sequence
    param->stm = xdmalloc(nframes,dim_st);
    for (i=0; i<nframes; i++)
    {
        for (j=0; j<dim_st; j++)
            param->stm->data[i][j] = param_track->frames[i][(j+1)*2];
    }

    /* Load cluster means */
    for (i=0; i<nframes; i++)
        for (j=0; j<dim_st; j++)
            param->mean->data[i][j] = param_track->frames[i][(j+1)*2];
    
    /* GMM parameters diagonal covariance */
    InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes);
    param->pdf = xdmalloc(nframes,dim*2);
    param->cov = xdmalloc(nframes,dim);
    for (i=0; i<nframes; i++)
        for (j=0; j<dim; j++)
            param->cov->data[i][j] = 
                param_track->frames[i][(j+1)*2+1] *
                param_track->frames[i][(j+1)*2+1];
    param->detvec = xget_detvec_diamat2inv(param->cov);

    /* global variance parameters */
    /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */

    get_dltmat(param->stm, &pst.dw, 1, param->dltm);

    //like = 
    get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
			param_track->frames, XTRUE);

    /* vlike = get_like_gv(dim2, dnum, param); */

    mlgparaChol(param->pdf, &pst, param->stm);

    /* Put the answer back into the output track */
    for (i=0; i<nframes; i++)
    {
        out->times[i] = param_track->times[i];
        out->frames[i][0] = param_track->frames[i][0]; /* F0 */
        for (j=0; j<dim_st; j++)
            out->frames[i][j+1] = param->stm->data[i][j];
    }

    // memory free
    xmlpgparafree(param);
    pst_free(&pst);

    return out;
}
示例#6
0
static cst_utterance *cg_predict_params(cst_utterance *utt)
{
    cst_cg_db *cg_db;
    cst_track *param_track;
    cst_track *str_track = NULL;
    cst_item *mcep;
    const cst_cart *mcep_tree, *f0_tree;
    int i,j,f,p,fd,o;
    const char *mname;
    float f0_val;
    int fff;
    int extra_feats = 0;

    cg_db = val_cg_db(utt_feat_val(utt,"cg_db"));
    param_track = new_track();
    if (cg_db->do_mlpg) /* which should be the default */
        fff = 1;  /* copy details with stddevs */
    else
        fff = 2;  /* copy details without stddevs */

    extra_feats = 1;  /* voicing */
    if (cg_db->mixed_excitation)
    {
        extra_feats += 5;
        str_track = new_track();
        cst_track_resize(str_track,
                         utt_feat_int(utt,"param_track_num_frames"),
                         5);
    }
    
    cst_track_resize(param_track,
                     utt_feat_int(utt,"param_track_num_frames"),
                     (cg_db->num_channels0/fff)-
                       (2 * extra_feats));/* no voicing or str */
    for (i=0,mcep=utt_rel_head(utt,"mcep"); mcep; i++,mcep=item_next(mcep))
    {
        mname = item_feat_string(mcep,"name");
        for (p=0; cg_db->types[p]; p++)
            if (cst_streq(mname,cg_db->types[p]))
                break;
        if (cg_db->types[0] == NULL)
            p=0; /* if there isn't a matching tree, use the first one */

        /* Predict F0 */
        f0_tree = cg_db->f0_trees[p];
        f0_val = val_float(cart_interpret(mcep,f0_tree));
        param_track->frames[i][0] = f0_val;
        /* what about stddev ? */

        if (cg_db->multimodel)
        {   /* MULTI model */
            f = val_int(cart_interpret(mcep,cg_db->param_trees0[p]));
            fd = val_int(cart_interpret(mcep,cg_db->param_trees1[p]));
            item_set_int(mcep,"clustergen_param_frame",f);

            param_track->frames[i][0] = 
                (param_track->frames[i][0]+
                 CG_MODEL_VECTOR(cg_db,model_vectors0,f,0)+
                 CG_MODEL_VECTOR(cg_db,model_vectors1,fd,0))/3.0;
            for (j=2; j<param_track->num_channels; j++)
                param_track->frames[i][j] = 
                    (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff)+
                     CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(j)*fff))/2.0;
            if (cg_db->mixed_excitation)
            {
                o = j;
                for (j=0; j<5; j++)
                {
                    str_track->frames[i][j] =
                        (CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff)+
                         CG_MODEL_VECTOR(cg_db,model_vectors1,fd,(o+(2*j))*fff))/2.0;
                }
            }
        }
        else  
        {   /* SINGLE model */
            /* Predict Spectral */
            mcep_tree = cg_db->param_trees0[p];
            f = val_int(cart_interpret(mcep,mcep_tree));
            item_set_int(mcep,"clustergen_param_frame",f);

            param_track->frames[i][0] = 
                (param_track->frames[i][0]+
                 CG_MODEL_VECTOR(cg_db,model_vectors0,f,0))/2.0;

            for (j=2; j<param_track->num_channels; j++)
                param_track->frames[i][j] =
                    CG_MODEL_VECTOR(cg_db,model_vectors0,f,(j)*fff);

            if (cg_db->mixed_excitation)
            {
                o = j;
                for (j=0; j<5; j++)
                {
                    str_track->frames[i][j] =
                        CG_MODEL_VECTOR(cg_db,model_vectors0,f,(o+(2*j))*fff);
                }
            }
        }

        /* last coefficient is average voicing for cluster */
        item_set_float(mcep,"voicing",
                       CG_MODEL_VECTOR(cg_db,model_vectors0,f,
                                       cg_db->num_channels0-2));

        param_track->times[i] = i * cg_db->frame_advance;
    }

    cg_smooth_F0(utt,cg_db,param_track);

    utt_set_feat(utt,"param_track",track_val(param_track));
    if (cg_db->mixed_excitation)
        utt_set_feat(utt,"str_track",track_val(str_track));

    return utt;
}
示例#7
0
int main(int argc, char **argv)
{
    cst_track *t1;
    cst_track *me_filters = NULL;
    cst_wave *w1, *w2, *res = NULL;
    cst_val *files;
    cst_features *args;
    int i, j;
    int order, o, s;
    int frame_length;
    float *lpcs, *residual;
    float m;
    const char *f1, *f2;
    const char *resfn = NULL;
    int last_peak = 0, next_peak;
    int period;
    float power;
    int rfc = 0;
    int str = 0;
    int fn, fo, ss;
    float xpulse, xnoise;
    float fxpulse, fxnoise;
    float x, me;
    float *hpulse = NULL, *hnoise = NULL;
    float *xpulsesig = NULL, *xnoisesig = NULL;
    int q = 0;
    int position;
    int lpc_start = 0;

    args = new_features();
    files =
        cst_args(argv, argc,
                 "usage: lpc_resynth OPTIONS INTRACK OUTWAVE\n"
                 "Resynth an lpc track\n"
                 "-res <string> residual (as waveform)\n"
                 "-save_res Save the generated residual\n"
                 "-lpc_start <int> start of lpc params in lpc track {1}\n"
                 "-order <int> LPC order {16}\n"
                 "-str mixed excitation strengths\n"
                 "-me_filters <string> mixed excitation filters\n"
                 "-rfc Coefficents are reflection coefficients\n", args);

    f1 = val_string(val_car(files));
    f2 = val_string(val_car(val_cdr(files)));
    t1 = new_track();

    lpc_start = mimic_get_param_int(args, "-lpc_start", 1);
    if (feat_present(args, "-rfc"))
        rfc = 1;
    if (feat_present(args, "-str"))
        str = 1;
    if (feat_present(args, "-me_filters"))
    {
        me_filters = new_track();
        if (cst_track_load_est
            (me_filters,
             mimic_get_param_string(args, "-me_filters",
                                    "me_filters.track")) != CST_OK_FORMAT)
        {
            fprintf(stderr,
                    "lpc_resynth: can't read file or wrong format \"%s\"\n",
                    f1);
            return -1;
        }
        hpulse = cst_alloc(float, me_filters->num_channels);
        hnoise = cst_alloc(float, me_filters->num_channels);
        xpulsesig = cst_alloc(float, me_filters->num_channels);
        xnoisesig = cst_alloc(float, me_filters->num_channels);
    }