static int save_dataset_comments (const dataset *dset) { int i, j; cst_voice *v; cst_wave *w, *fullw = NULL; flite_init(); v = register_cmu_us_kal(); j = 0; for (i=0; i<N_COMMENTS; i++) { if (dset->comments[i] != NULL) { if (j == 0) { fullw = flite_text_to_wave(dset->comments[i], v); } else { w = flite_text_to_wave(dset->comments[i], v); concat_wave(fullw, w); delete_wave(w); } j++; } } cst_wave_save_riff(fullw, "gretl_flite.wav"); delete_wave(fullw); return 0; }
float flite_process_output(cst_utterance *u, const char *outtype, int append) { /* Play or save (append) output to output file */ cst_wave *w; float dur; if (!u) return 0.0; w = utt_wave(u); dur = (float)w->num_samples/(float)w->sample_rate; if (cst_streq(outtype,"play")) play_wave(w); else if (cst_streq(outtype,"stream")) { /* It's already been played so do nothing */ } else if (!cst_streq(outtype,"none")) { if (append) cst_wave_append_riff(w,outtype); else cst_wave_save_riff(w,outtype); } return dur; }
int main(int argc, char **argv) { cst_wave *nw, *all; cst_val *files; const cst_val *w; cst_val *wavelist; cst_features *args; int i,j; float ntime; int stime; const char *nwfile; args = new_features(); files = cst_args(argv,argc, "usage: combine_waves OPTIONS\n" "Combine waves into single waveform\n" "-o <string> Output waveform\n" "-f <int> Input sample rate (for raw input)\n" "-itype <string> Input type, raw or headered\n" "-wavelist <string> File containing times and wave filenames\n", args); wavelist = get_wavelist(get_param_string(args,"-wavelist","-")); if (wavelist == 0) return -1; all = new_wave(); for (w = wavelist; w; w = val_cdr(w)) { ntime = decode_time(val_string(val_car(w))); nwfile = val_string(val_car(val_cdr(w))); nw = new_wave(); if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT) { fprintf(stderr, "combine_waves: can't read file or wrong format \"%s\"\n", nwfile); continue; } stime = ntime * nw->sample_rate; cst_wave_resize(all,stime+nw->num_samples,1); for (i=0,j=stime; i<nw->num_samples; i++,j++) { /* this will cause overflows */ all->samples[j] += nw->samples[i]; } delete_wave(nw); } cst_wave_save_riff(all,get_param_string(args,"-o","-")); return 0; }
int flowm_save_wave(TCHAR *filename) { /* Save the Last synthesized waveform file to filename */ char *sfilename; int rc; if (!previous_wave) return -1; sfilename = cst_wstr2cstr(filename); rc = cst_wave_save_riff(previous_wave,sfilename); cst_free(sfilename); return rc; }
float flite_ssml_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_tokenstream *ts; int fp; cst_wave *w; float d; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for ssml reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } d = flite_ssml_to_speech_ts(ts,voice,outtype); ts_close(ts); return d; }
int main(int argc, char **argv) { cst_wave *in, *out; cst_val *files; cst_features *args; int i,j; int w, a, t; args = new_features(); files = cst_args(argv,argc, "usage: dcoffset_wave OPTIONS\n" "Subtract window average from waveform\n" "-i <string> Input waveform\n" "-o <string> Output waveform\n" "-w <int> Window size (in samples)\n", args); w = flite_get_param_int(args,"-w",20); in = new_wave(); cst_wave_load_riff(in,flite_get_param_string(args,"-i","-")); out = copy_wave(in); for (i=0; i<=out->num_samples; i++) { for (t=a=0,j=i-w/2; j < i+w/2; j++) { if ((j > 0) && (j < out->num_samples)) { t += 1; a+=in->samples[j]; } } /* printf("%d %d %d %d %d\n",i,out->samples[i],a/t,t,out->samples[i]-a/t); */ out->samples[i] -= a/t; } cst_wave_save_riff(out,flite_get_param_string(args,"-o","-")); return 0; }
int main(int argc, char **argv) { cst_track *lpc; cst_wave *sig, *sig2; cst_sts *sts; if (argc != 6) { fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n"); return 1; } lpc_min = atof(argv[1]); lpc_range = atof(argv[2]); lpc = new_track(); cst_track_load_est(lpc,argv[3]); sig = new_wave(); if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT) { fprintf(stderr, "cannot load waveform, format unrecognized, from \"%s\"\n", argv[4]); exit(-1); } sts = find_sts(sig,lpc); /* See if it worked */ sig2 = reconstruct_wave(sig,sts,lpc); compare_waves(sig,sig2); cst_wave_save_riff(sig2,"sig2.wav"); save_sts(sts,lpc,sig,argv[5]); return 0; }
float flite_phones_to_speech(const char *text, cst_voice *voice, const char *outtype) { cst_utterance *u; cst_wave *w; float durs; u = flite_synth_phones(text,voice); if (u == NULL) return -1; w = utt_wave(u); durs = (float)w->num_samples/(float)w->sample_rate; if (cst_streq(outtype,"play")) play_wave(w); else if (!cst_streq(outtype,"none")) cst_wave_save_riff(w,outtype); delete_utterance(u); return durs; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }