Beispiel #1
0
static int save_dataset_comments (const dataset *dset)
{
    int i, j;
    cst_voice *v;
    cst_wave *w, *fullw = NULL;

    flite_init();
    v = register_cmu_us_kal();

    j = 0;
    for (i=0; i<N_COMMENTS; i++) {
	if (dset->comments[i] != NULL) {
	    if (j == 0) {
		fullw = flite_text_to_wave(dset->comments[i], v);
	    } else {
		w = flite_text_to_wave(dset->comments[i], v);
		concat_wave(fullw, w);
		delete_wave(w);
	    }
	    j++;
	}
    }

    cst_wave_save_riff(fullw, "gretl_flite.wav");
    delete_wave(fullw);

    return 0;
}
Beispiel #2
0
float flite_process_output(cst_utterance *u, const char *outtype,
                           int append)
{
    /* Play or save (append) output to output file */
    cst_wave *w;
    float dur;

    if (!u) return 0.0;

    w = utt_wave(u);

    dur = (float)w->num_samples/(float)w->sample_rate;
	     
    if (cst_streq(outtype,"play"))
	play_wave(w);
    else if (cst_streq(outtype,"stream"))
    {
        /* It's already been played so do nothing */
        
    }
    else if (!cst_streq(outtype,"none"))
    {
        if (append)
            cst_wave_append_riff(w,outtype);
        else
            cst_wave_save_riff(w,outtype);
    }

    return dur;
}
Beispiel #3
0
int main(int argc, char **argv)
{
    cst_wave *nw, *all;
    cst_val *files;
    const cst_val *w;
    cst_val *wavelist;
    cst_features *args;
    int i,j;
    float ntime;
    int stime;
    const char *nwfile;

    args = new_features();
    files =
        cst_args(argv,argc,
                 "usage: combine_waves OPTIONS\n"
                 "Combine waves into single waveform\n"
		 "-o <string>  Output waveform\n"
		 "-f <int>     Input sample rate (for raw input)\n"
		 "-itype <string>  Input type, raw or headered\n"
		 "-wavelist <string>  File containing times and wave filenames\n",
                 args);

    wavelist = get_wavelist(get_param_string(args,"-wavelist","-"));

    if (wavelist == 0)
	return -1;

    all = new_wave();
    for (w = wavelist; w; w = val_cdr(w))
    {
	ntime = decode_time(val_string(val_car(w)));
	nwfile = val_string(val_car(val_cdr(w)));

	nw = new_wave();
	if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT)
	{
	    fprintf(stderr,
		    "combine_waves: can't read file or wrong format \"%s\"\n",
		    nwfile);
	    continue;
	}

	stime = ntime * nw->sample_rate;

	cst_wave_resize(all,stime+nw->num_samples,1);
	
	for (i=0,j=stime; i<nw->num_samples; i++,j++)
	{
	    /* this will cause overflows */
	    all->samples[j] += nw->samples[i];
	}
	delete_wave(nw);
    }

    cst_wave_save_riff(all,get_param_string(args,"-o","-"));

    return 0;
}
Beispiel #4
0
int flowm_save_wave(TCHAR *filename)
{
    /* Save the Last synthesized waveform file to filename */
    char *sfilename;
    int rc;

    if (!previous_wave)
        return -1;

    sfilename = cst_wstr2cstr(filename);
    rc = cst_wave_save_riff(previous_wave,sfilename);
    cst_free(sfilename);

    return rc;
}
Beispiel #5
0
float flite_ssml_to_speech(const char *filename,
                           cst_voice *voice,
                           const char *outtype)
{
    cst_tokenstream *ts;
    int fp;
    cst_wave *w;
    float d;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for ssml reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    d = flite_ssml_to_speech_ts(ts,voice,outtype);

    ts_close(ts);
    
    return d;

}
int main(int argc, char **argv)
{
    cst_wave *in, *out;
    cst_val *files;
    cst_features *args;
    int i,j;
    int w, a, t;

    args = new_features();
    files =
        cst_args(argv,argc,
                 "usage: dcoffset_wave OPTIONS\n"
                 "Subtract window average from waveform\n"
		 "-i <string>  Input waveform\n"
		 "-o <string>  Output waveform\n"
		 "-w <int>     Window size (in samples)\n",
                 args);

    w = flite_get_param_int(args,"-w",20);
    in = new_wave();
    cst_wave_load_riff(in,flite_get_param_string(args,"-i","-"));

    out = copy_wave(in);

    for (i=0; i<=out->num_samples; i++)
    {
        for (t=a=0,j=i-w/2; j < i+w/2; j++)
        {
            if ((j > 0) && (j < out->num_samples))
            {
                t += 1;
                a+=in->samples[j];
            }
        }
        /*        printf("%d %d %d %d %d\n",i,out->samples[i],a/t,t,out->samples[i]-a/t); */
        out->samples[i] -= a/t;
    }

    cst_wave_save_riff(out,flite_get_param_string(args,"-o","-"));

    return 0;
}
int main(int argc, char **argv)
{
    cst_track *lpc;
    cst_wave *sig, *sig2;
    cst_sts *sts;

    if (argc != 6)
    {
	fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n");
	return 1;
    }

    lpc_min = atof(argv[1]);
    lpc_range = atof(argv[2]);

    lpc = new_track();
    cst_track_load_est(lpc,argv[3]);
    sig = new_wave();
    if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT)
    {
	fprintf(stderr,
		"cannot load waveform, format unrecognized, from \"%s\"\n",
		argv[4]);
	exit(-1);
    }

    sts = find_sts(sig,lpc);

    /* See if it worked */
    sig2 = reconstruct_wave(sig,sts,lpc);

    compare_waves(sig,sig2);
    cst_wave_save_riff(sig2,"sig2.wav");

    save_sts(sts,lpc,sig,argv[5]);

    return 0;
}
float flite_phones_to_speech(const char *text,
			     cst_voice *voice,
			     const char *outtype)
{
    cst_utterance *u;
    cst_wave *w;
    float durs;

    u = flite_synth_phones(text,voice);
    if (u == NULL)
	    return -1;
    w = utt_wave(u);

    durs = (float)w->num_samples/(float)w->sample_rate;
	     
    if (cst_streq(outtype,"play"))
	play_wave(w);
    else if (!cst_streq(outtype,"none"))
	cst_wave_save_riff(w,outtype);
    delete_utterance(u);

    return durs;
}
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float d, durs = 0;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    /* If its a file to write to delete it as we're going to */
    /* incrementally append to it                            */
    if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none"))
    {
	cst_wave *w;
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt */
	    d = flite_tokens_to_speech(utt,voice,outtype);
	    utt = NULL;
	    if (d < 0)
		goto out;
	    durs += d;

	    if (ts_eof(ts))
		goto out;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
	item_set_int(t,"file_pos",ts->file_pos);
	item_set_int(t,"line_number",ts->line_number);
    }

out:
    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
Beispiel #10
0
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float durs = 0;
    int num_tokens;
    cst_wave *w;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    int fp;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((cst_strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt, so synthesize it */
            if (utt_user_callback)
                utt = (utt_user_callback)(utt);

            if (utt)
            {
                utt = flite_do_synth(utt,voice,utt_synth_tokens);
                durs += flite_process_output(utt,outtype,TRUE);
                delete_utterance(utt); utt = NULL;
            }
            else 
                break;

	    if (ts_eof(ts)) break;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
        /* Mark it at the beginning of the token */
	item_set_int(t,"file_pos",
                     ts->file_pos-(1+ /* as we are already on the next char */
                                   cst_strlen(token)+
                                   cst_strlen(ts->prepunctuation)+
                                   cst_strlen(ts->postpunctuation)));
	item_set_int(t,"line_number",ts->line_number);
    }

    delete_utterance(utt);
    ts_close(ts);
    return durs;
}