void test_resize(void) { cst_wave *w = new_wave(); cst_wave_resize(w, 200, 2); TEST_CHECK(w->num_samples == 200); TEST_CHECK(w->num_channels = 2); TEST_CHECK(cst_wave_resize(NULL, 200, 2) < 0); delete_wave(w); }
int main(int argc, char **argv) { cst_wave *nw, *all; cst_val *files; const cst_val *w; cst_val *wavelist; cst_features *args; int i,j; float ntime; int stime; const char *nwfile; args = new_features(); files = cst_args(argv,argc, "usage: combine_waves OPTIONS\n" "Combine waves into single waveform\n" "-o <string> Output waveform\n" "-f <int> Input sample rate (for raw input)\n" "-itype <string> Input type, raw or headered\n" "-wavelist <string> File containing times and wave filenames\n", args); wavelist = get_wavelist(get_param_string(args,"-wavelist","-")); if (wavelist == 0) return -1; all = new_wave(); for (w = wavelist; w; w = val_cdr(w)) { ntime = decode_time(val_string(val_car(w))); nwfile = val_string(val_car(val_cdr(w))); nw = new_wave(); if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT) { fprintf(stderr, "combine_waves: can't read file or wrong format \"%s\"\n", nwfile); continue; } stime = ntime * nw->sample_rate; cst_wave_resize(all,stime+nw->num_samples,1); for (i=0,j=stime; i<nw->num_samples; i++,j++) { /* this will cause overflows */ all->samples[j] += nw->samples[i]; } delete_wave(nw); } cst_wave_save_riff(all,get_param_string(args,"-o","-")); return 0; }
void test_rescale(void) { int i; cst_wave *w = new_wave(); cst_wave_resize(w, 10, 2); for (i = 0; i < 10; i++) w->samples[i] = 10 + i; cst_wave_rescale(w, 65536 * 2); //scale x2 for (i = 0; i < 10; i++) TEST_CHECK(w->samples[i] == (10 + i) * 2); delete_wave(w); }
cst_wave *lpc_resynth(cst_lpcres *lpcres) { cst_wave *w; int i,j,r,o,k; int ci,cr; float *outbuf, *lpccoefs; int pm_size_samps; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(float,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(float,lpcres->num_channels); for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++) { pm_size_samps = lpcres->sizes[i]; /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) { lpccoefs[k] = (float)((((double)lpcres->frames[i][k])/65535.0)* lpcres->lpc_range) + lpcres->lpc_min; } /* Note we don't zero the lead in from the previous part */ /* seems like you should but it makes it worse if you do */ /* memset(outbuf,0,sizeof(float)*(1+lpcres->num_channels)); */ /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (float)cst_ulaw_to_short(lpcres->residual[r]); cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += lpccoefs[ci] * outbuf[cr]; cr = (cr == 0 ? lpcres->num_channels : cr-1); } w->samples[r] = (short)(outbuf[o]); o = (o == lpcres->num_channels ? 0 : o+1); } } cst_free(outbuf); cst_free(lpccoefs); return w; }
cst_wave *lpc_resynth_sfp(cst_lpcres *lpcres) { /* The fixed point spike excited, without floats */ cst_wave *w; int i,j,r,o,k; int ci,cr; int *outbuf, *lpccoefs; int pm_size_samps, ilpc_min, ilpc_range; //int pp = 0; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(int,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(int,lpcres->num_channels); ilpc_min = (int)(lpcres->lpc_min*32768.0); /* assume range is never > abs(16) */ ilpc_range = (int)(lpcres->lpc_range*2048.0); for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++) { pm_size_samps = lpcres->sizes[i]; /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2; /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (int)cst_ulaw_to_short(lpcres->residual[r]); cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += (lpccoefs[ci]*outbuf[cr])/16384; cr = (cr == 0 ? lpcres->num_channels : cr-1); } w->samples[r] = (short)outbuf[o]; //pp = outbuf[o]; o = (o == lpcres->num_channels ? 0 : o+1); } } cst_free(outbuf); cst_free(lpccoefs); return w; }
float flite_ssml_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_tokenstream *ts; int fp; cst_wave *w; float d; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for ssml reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } d = flite_ssml_to_speech_ts(ts,voice,outtype); ts_close(ts); return d; }
cst_wave *lpc_resynth_fixedpoint(cst_lpcres *lpcres) { /* The fixed point version, without floats */ cst_wave *w; int i,j,r,o,k; int stream_mark; int ci,cr; int *outbuf, *lpccoefs; int pm_size_samps, ilpc_min, ilpc_range; //int pp = 0; int rc = CST_AUDIO_STREAM_CONT; /* Get a new wave to build the signal into */ w = new_wave(); cst_wave_resize(w,lpcres->num_samples,1); w->sample_rate = lpcres->sample_rate; /* outbuf is a circular buffer with past relevant samples in it */ outbuf = cst_alloc(int,1+lpcres->num_channels); /* unpacked lpc coefficients */ lpccoefs = cst_alloc(int,lpcres->num_channels); ilpc_min = (int)(lpcres->lpc_min*32768.0); /* assume range is never > abs(16) */ ilpc_range = (int)(lpcres->lpc_range*2048.0); stream_mark = 0; for (r=0,o=lpcres->num_channels,i=0; (rc == CST_AUDIO_STREAM_CONT) && (i < lpcres->num_frames); i++) { pm_size_samps = lpcres->sizes[i]; if (lpcres->delayed_decoding) { /* do decoding for this frame */ add_residual_g721vuv(lpcres->sizes[i], &lpcres->residual[r], lpcres->sizes[i], lpcres->packed_residuals[i]); } /* Unpack the LPC coefficients */ for (k=0; k<lpcres->num_channels; k++) lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2; /* resynthesis the signal */ for (j=0; j < pm_size_samps; j++,r++) { outbuf[o] = (int)ulaw_to_short_table[lpcres->residual[r]]; outbuf[o] *= 16384; cr = (o == 0 ? lpcres->num_channels : o-1); for (ci=0; ci < lpcres->num_channels; ci++) { outbuf[o] += lpccoefs[ci]*outbuf[cr]; cr = (cr == 0 ? lpcres->num_channels : cr-1); } outbuf[o] /= 16384; w->samples[r] = (short)outbuf[o]; //pp = outbuf[o]; o = (o == lpcres->num_channels ? 0 : o+1); } if (lpcres->asi && (r-stream_mark > lpcres->asi->min_buffsize)) { rc = (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,0, lpcres->asi); stream_mark = r; } } if ((lpcres->asi) && (rc == CST_AUDIO_STREAM_CONT)) (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,1,lpcres->asi); cst_free(outbuf); cst_free(lpccoefs); w->num_samples = r; /* just to be safe */ return w; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }