int main(int argc, char **argv) { cst_wave *nw, *all; cst_val *files; const cst_val *w; cst_val *wavelist; cst_features *args; int i,j; float ntime; int stime; const char *nwfile; args = new_features(); files = cst_args(argv,argc, "usage: combine_waves OPTIONS\n" "Combine waves into single waveform\n" "-o <string> Output waveform\n" "-f <int> Input sample rate (for raw input)\n" "-itype <string> Input type, raw or headered\n" "-wavelist <string> File containing times and wave filenames\n", args); wavelist = get_wavelist(get_param_string(args,"-wavelist","-")); if (wavelist == 0) return -1; all = new_wave(); for (w = wavelist; w; w = val_cdr(w)) { ntime = decode_time(val_string(val_car(w))); nwfile = val_string(val_car(val_cdr(w))); nw = new_wave(); if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT) { fprintf(stderr, "combine_waves: can't read file or wrong format \"%s\"\n", nwfile); continue; } stime = ntime * nw->sample_rate; cst_wave_resize(all,stime+nw->num_samples,1); for (i=0,j=stime; i<nw->num_samples; i++,j++) { /* this will cause overflows */ all->samples[j] += nw->samples[i]; } delete_wave(nw); } cst_wave_save_riff(all,get_param_string(args,"-o","-")); return 0; }
void test_create(void) { cst_wave *w = NULL; w = new_wave(); TEST_CHECK(w != NULL); TEST_CHECK(w->num_samples == 0); TEST_CHECK(w->type == NULL); delete_wave(w); }
void test_resize(void) { cst_wave *w = new_wave(); cst_wave_resize(w, 200, 2); TEST_CHECK(w->num_samples == 200); TEST_CHECK(w->num_channels = 2); TEST_CHECK(cst_wave_resize(NULL, 200, 2) < 0); delete_wave(w); }
void test_rescale(void) { int i; cst_wave *w = new_wave(); cst_wave_resize(w, 10, 2); for (i = 0; i < 10; i++) w->samples[i] = 10 + i; cst_wave_rescale(w, 65536 * 2); //scale x2 for (i = 0; i < 10; i++) TEST_CHECK(w->samples[i] == (10 + i) * 2); delete_wave(w); }
cst_wave *lpc_resynth(cst_lpcres *lpcres) { cst_wave *w; int i,j,r,o,k; int ci,cr; float *outbuf, *lpccoefs; int pm_size_samps; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(float,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(float,lpcres->num_channels); for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++) { pm_size_samps = lpcres->sizes[i]; /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) { lpccoefs[k] = (float)((((double)lpcres->frames[i][k])/65535.0)* lpcres->lpc_range) + lpcres->lpc_min; } /* Note we don't zero the lead in from the previous part */ /* seems like you should but it makes it worse if you do */ /* memset(outbuf,0,sizeof(float)*(1+lpcres->num_channels)); */ /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (float)cst_ulaw_to_short(lpcres->residual[r]); cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += lpccoefs[ci] * outbuf[cr]; cr = (cr == 0 ? lpcres->num_channels : cr-1); } w->samples[r] = (short)(outbuf[o]); o = (o == lpcres->num_channels ? 0 : o+1); } } cst_free(outbuf); cst_free(lpccoefs); return w; }
cst_wave *lpc_resynth_sfp(cst_lpcres *lpcres) { /* The fixed point spike excited, without floats */ cst_wave *w; int i,j,r,o,k; int ci,cr; int *outbuf, *lpccoefs; int pm_size_samps, ilpc_min, ilpc_range; //int pp = 0; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(int,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(int,lpcres->num_channels); ilpc_min = (int)(lpcres->lpc_min*32768.0); /* assume range is never > abs(16) */ ilpc_range = (int)(lpcres->lpc_range*2048.0); for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++) { pm_size_samps = lpcres->sizes[i]; /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2; /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (int)cst_ulaw_to_short(lpcres->residual[r]); cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += (lpccoefs[ci]*outbuf[cr])/16384; cr = (cr == 0 ? lpcres->num_channels : cr-1); } w->samples[r] = (short)outbuf[o]; //pp = outbuf[o]; o = (o == lpcres->num_channels ? 0 : o+1); } } cst_free(outbuf); cst_free(lpccoefs); return w; }
float flite_ssml_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_tokenstream *ts; int fp; cst_wave *w; float d; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for ssml reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } d = flite_ssml_to_speech_ts(ts,voice,outtype); ts_close(ts); return d; }
cst_utterance *join_units_modified_lpc(cst_utterance *utt) { cst_wave *w = 0; cst_lpcres *lpcres; const char *resynth_type; const cst_val *streaming_info_val; resynth_type = get_param_string(utt->features, "resynth_type", "float"); f0_targets_to_pm(utt); concat_units(utt); lpcres = val_lpcres(utt_feat_val(utt, "target_lpcres")); streaming_info_val = get_param_val(utt->features, "streaming_info", NULL); if (streaming_info_val) { lpcres->asi = val_audio_streaming_info(streaming_info_val); lpcres->asi->utt = utt; } if (cst_streq(resynth_type, "float")) w = lpc_resynth(lpcres); else if (cst_streq(resynth_type, "fixed")) { w = lpc_resynth_fixedpoint(lpcres); } else { cst_errmsg("unknown resynthesis type %s\n", resynth_type); cst_error(); /* Should not happen */ } if (w == NULL) { /* Synthesis Failed, probably because it was interrupted */ utt_set_feat_int(utt, "Interrupted", 1); w = new_wave(); } utt_set_wave(utt, w); return utt; }
int main(int argc, char **argv) { cst_wave *in, *out; cst_val *files; cst_features *args; int i,j; int w, a, t; args = new_features(); files = cst_args(argv,argc, "usage: dcoffset_wave OPTIONS\n" "Subtract window average from waveform\n" "-i <string> Input waveform\n" "-o <string> Output waveform\n" "-w <int> Window size (in samples)\n", args); w = flite_get_param_int(args,"-w",20); in = new_wave(); cst_wave_load_riff(in,flite_get_param_string(args,"-i","-")); out = copy_wave(in); for (i=0; i<=out->num_samples; i++) { for (t=a=0,j=i-w/2; j < i+w/2; j++) { if ((j > 0) && (j < out->num_samples)) { t += 1; a+=in->samples[j]; } } /* printf("%d %d %d %d %d\n",i,out->samples[i],a/t,t,out->samples[i]-a/t); */ out->samples[i] -= a/t; } cst_wave_save_riff(out,flite_get_param_string(args,"-o","-")); return 0; }
int main(int argc, char **argv) { cst_track *lpc; cst_wave *sig, *sig2; cst_sts *sts; if (argc != 6) { fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n"); return 1; } lpc_min = atof(argv[1]); lpc_range = atof(argv[2]); lpc = new_track(); cst_track_load_est(lpc,argv[3]); sig = new_wave(); if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT) { fprintf(stderr, "cannot load waveform, format unrecognized, from \"%s\"\n", argv[4]); exit(-1); } sts = find_sts(sig,lpc); /* See if it worked */ sig2 = reconstruct_wave(sig,sts,lpc); compare_waves(sig,sig2); cst_wave_save_riff(sig2,"sig2.wav"); save_sts(sts,lpc,sig,argv[5]); return 0; }
int main(int argc, char **argv) { cst_wave *w; cst_relation *r; cst_utterance *u; if (argc != 3) { fprintf(stderr, "usage: mimic_play_wave_sync WAVEFILE LABELFILE\n"); return 1; } w = new_wave(); if (cst_wave_load_riff(w, argv[1]) != CST_OK_FORMAT) return -1; u = new_utterance(); r = utt_relation_create(u, "FOO"); if (relation_load(r, argv[2]) != CST_OK_FORMAT) return -1; mimic_play_wave_sync(w, r, my_call_back); return 0; }
cst_wave *lpc_resynth_fixedpoint(cst_lpcres *lpcres) { /* The fixed point version, without floats */ cst_wave *w; int i,j,r,o,k; int stream_mark; int ci,cr; int *outbuf, *lpccoefs; int pm_size_samps, ilpc_min, ilpc_range; //int pp = 0; int rc = CST_AUDIO_STREAM_CONT; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(int,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(int,lpcres->num_channels); ilpc_min = (int)(lpcres->lpc_min*32768.0); /* assume range is never > abs(16) */ ilpc_range = (int)(lpcres->lpc_range*2048.0); stream_mark = 0; for (r=0,o=lpcres->num_channels,i=0; (rc == CST_AUDIO_STREAM_CONT) && (i < lpcres->num_frames); i++) { pm_size_samps = lpcres->sizes[i]; if (lpcres->delayed_decoding) { /* do decoding for this frame */ add_residual_g721vuv(lpcres->sizes[i], &lpcres->residual[r], lpcres->sizes[i], lpcres->packed_residuals[i]); } /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2; /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (int)ulaw_to_short_table[lpcres->residual[r]]; outbuf[o] *= 16384; cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += lpccoefs[ci]*outbuf[cr]; cr = (cr == 0 ? lpcres->num_channels : cr-1); } outbuf[o] /= 16384; w->samples[r] = (short)outbuf[o]; //pp = outbuf[o]; o = (o == lpcres->num_channels ? 0 : o+1); } if (lpcres->asi && (r-stream_mark > lpcres->asi->min_buffsize)) { rc = (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,0, lpcres->asi); stream_mark = r; } } if ((lpcres->asi) && (rc == CST_AUDIO_STREAM_CONT)) (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,1,lpcres->asi); cst_free(outbuf); cst_free(lpccoefs); w->num_samples = r; /* just to be safe */ return w; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }
int main(int argc, char **argv) { cst_wave *w; int port; char *server; char *encoding; int i,iw; port = CST_AUDIO_DEFAULT_PORT; server = CST_AUDIO_DEFAULT_SERVER; encoding = CST_AUDIO_DEFAULT_ENCODING; if (argc == 1) { play_client_main_usage(); return 1; } if ((cst_streq("-h",argv[1])) || (cst_streq("-help",argv[1])) || (cst_streq("--help",argv[1]))) { play_client_main_usage(); return 1; } iw = 1; if (cst_streq("-s",argv[iw])) { if (argc < iw+1) { fprintf(stderr,"ERROR: no servername given\n"); play_client_main_usage(); return 1; } server = argv[iw+1]; iw+=2; } if (cst_streq("-p",argv[iw])) { if (argc < iw+1) { fprintf(stderr,"ERROR: no port given\n"); play_client_main_usage(); return 1; } port = atoi(argv[iw+1]); iw+=2; } if (cst_streq("-e",argv[iw])) { if (argc < iw+1) { fprintf(stderr,"ERROR: no encoding given\n"); play_client_main_usage(); return 1; } encoding = argv[iw+1]; iw+=2; } for (i=iw; i<argc; i++) { w = new_wave(); if (cst_wave_load_riff(w,argv[i]) != CST_OK_FORMAT) { fprintf(stderr, "play_wave: can't read file or wrong format \"%s\"\n", argv[i]); continue; } play_wave_client(w,server,port,encoding); delete_wave(w); } return 0; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }