Пример #1
0
void test_resize(void)
{
   cst_wave *w = new_wave();
   cst_wave_resize(w, 200, 2);
   TEST_CHECK(w->num_samples == 200);
   TEST_CHECK(w->num_channels = 2);
   TEST_CHECK(cst_wave_resize(NULL, 200, 2) < 0);
   delete_wave(w);
}
Пример #2
0
int main(int argc, char **argv)
{
    cst_wave *nw, *all;
    cst_val *files;
    const cst_val *w;
    cst_val *wavelist;
    cst_features *args;
    int i,j;
    float ntime;
    int stime;
    const char *nwfile;

    args = new_features();
    files =
        cst_args(argv,argc,
                 "usage: combine_waves OPTIONS\n"
                 "Combine waves into single waveform\n"
		 "-o <string>  Output waveform\n"
		 "-f <int>     Input sample rate (for raw input)\n"
		 "-itype <string>  Input type, raw or headered\n"
		 "-wavelist <string>  File containing times and wave filenames\n",
                 args);

    wavelist = get_wavelist(get_param_string(args,"-wavelist","-"));

    if (wavelist == 0)
	return -1;

    all = new_wave();
    for (w = wavelist; w; w = val_cdr(w))
    {
	ntime = decode_time(val_string(val_car(w)));
	nwfile = val_string(val_car(val_cdr(w)));

	nw = new_wave();
	if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT)
	{
	    fprintf(stderr,
		    "combine_waves: can't read file or wrong format \"%s\"\n",
		    nwfile);
	    continue;
	}

	stime = ntime * nw->sample_rate;

	cst_wave_resize(all,stime+nw->num_samples,1);
	
	for (i=0,j=stime; i<nw->num_samples; i++,j++)
	{
	    /* this will cause overflows */
	    all->samples[j] += nw->samples[i];
	}
	delete_wave(nw);
    }

    cst_wave_save_riff(all,get_param_string(args,"-o","-"));

    return 0;
}
Пример #3
0
void test_rescale(void)
{
   int i;
   cst_wave *w = new_wave();
   cst_wave_resize(w, 10, 2);
   for (i = 0; i < 10; i++)
      w->samples[i] = 10 + i;
   cst_wave_rescale(w, 65536 * 2); //scale x2
   for (i = 0; i < 10; i++)
      TEST_CHECK(w->samples[i] == (10 + i) * 2);
   delete_wave(w);
}
cst_wave *lpc_resynth(cst_lpcres *lpcres)
{
    cst_wave *w;
    int i,j,r,o,k;
    int ci,cr;
    float *outbuf, *lpccoefs;
    int pm_size_samps;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(float,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(float,lpcres->num_channels);

    for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++)
    {
	pm_size_samps = lpcres->sizes[i];

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	{
	    lpccoefs[k] = (float)((((double)lpcres->frames[i][k])/65535.0)*
			   lpcres->lpc_range) + lpcres->lpc_min;
	}
	/* Note we don't zero the lead in from the previous part */
	/* seems like you should but it makes it worse if you do */
/*	memset(outbuf,0,sizeof(float)*(1+lpcres->num_channels)); */

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
            outbuf[o] = (float)cst_ulaw_to_short(lpcres->residual[r]);
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += lpccoefs[ci] * outbuf[cr];
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    w->samples[r] = (short)(outbuf[o]);
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
    }

    cst_free(outbuf);
    cst_free(lpccoefs);

    return w;

}
cst_wave *lpc_resynth_sfp(cst_lpcres *lpcres)
{
    /* The fixed point spike excited, without floats */
    cst_wave *w;
    int i,j,r,o,k;
    int ci,cr;
    int *outbuf, *lpccoefs;
    int pm_size_samps, ilpc_min, ilpc_range;
    //int pp = 0;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(int,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(int,lpcres->num_channels);
    ilpc_min = (int)(lpcres->lpc_min*32768.0);
    /* assume range is never > abs(16) */
    ilpc_range = (int)(lpcres->lpc_range*2048.0);

    for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++)
    {
	pm_size_samps = lpcres->sizes[i];

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	    lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2;

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
	    outbuf[o] = (int)cst_ulaw_to_short(lpcres->residual[r]);
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += (lpccoefs[ci]*outbuf[cr])/16384;
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    w->samples[r] = (short)outbuf[o];
	    //pp = outbuf[o];
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
    }

    cst_free(outbuf);
    cst_free(lpccoefs);

    return w;

}
Пример #6
0
float flite_ssml_to_speech(const char *filename,
                           cst_voice *voice,
                           const char *outtype)
{
    cst_tokenstream *ts;
    int fp;
    cst_wave *w;
    float d;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for ssml reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    d = flite_ssml_to_speech_ts(ts,voice,outtype);

    ts_close(ts);
    
    return d;

}
cst_wave *lpc_resynth_fixedpoint(cst_lpcres *lpcres)
{
    /* The fixed point version, without floats */
    cst_wave *w;
    int i,j,r,o,k;
    int stream_mark;
    int ci,cr;
    int *outbuf, *lpccoefs;
    int pm_size_samps, ilpc_min, ilpc_range;
    //int pp = 0;
    int rc = CST_AUDIO_STREAM_CONT;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(int,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(int,lpcres->num_channels);
    ilpc_min = (int)(lpcres->lpc_min*32768.0);
    /* assume range is never > abs(16) */
    ilpc_range = (int)(lpcres->lpc_range*2048.0);

    stream_mark = 0;
    for (r=0,o=lpcres->num_channels,i=0; 
         (rc == CST_AUDIO_STREAM_CONT) && (i < lpcres->num_frames); 
         i++)
    {
	pm_size_samps = lpcres->sizes[i];

        if (lpcres->delayed_decoding)
        {
            /* do decoding for this frame */
            add_residual_g721vuv(lpcres->sizes[i],
                                 &lpcres->residual[r],
                                 lpcres->sizes[i],
                                 lpcres->packed_residuals[i]);
        }

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	    lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2;

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
            outbuf[o] = (int)ulaw_to_short_table[lpcres->residual[r]];
	    outbuf[o] *= 16384;
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += lpccoefs[ci]*outbuf[cr];
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    outbuf[o] /= 16384;
	    w->samples[r] = (short)outbuf[o];
	    //pp = outbuf[o];
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
        if (lpcres->asi && (r-stream_mark > lpcres->asi->min_buffsize))
        {
             rc = (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,0,
                                 lpcres->asi);
             stream_mark = r;
        }
    }

    if ((lpcres->asi) && (rc == CST_AUDIO_STREAM_CONT))
        (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,1,lpcres->asi);

    cst_free(outbuf);
    cst_free(lpccoefs);
    w->num_samples = r;  /* just to be safe */

    return w;

}
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float d, durs = 0;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    /* If its a file to write to delete it as we're going to */
    /* incrementally append to it                            */
    if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none"))
    {
	cst_wave *w;
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt */
	    d = flite_tokens_to_speech(utt,voice,outtype);
	    utt = NULL;
	    if (d < 0)
		goto out;
	    durs += d;

	    if (ts_eof(ts))
		goto out;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
	item_set_int(t,"file_pos",ts->file_pos);
	item_set_int(t,"line_number",ts->line_number);
    }

out:
    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
Пример #9
0
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float durs = 0;
    int num_tokens;
    cst_wave *w;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    int fp;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((cst_strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt, so synthesize it */
            if (utt_user_callback)
                utt = (utt_user_callback)(utt);

            if (utt)
            {
                utt = flite_do_synth(utt,voice,utt_synth_tokens);
                durs += flite_process_output(utt,outtype,TRUE);
                delete_utterance(utt); utt = NULL;
            }
            else 
                break;

	    if (ts_eof(ts)) break;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
        /* Mark it at the beginning of the token */
	item_set_int(t,"file_pos",
                     ts->file_pos-(1+ /* as we are already on the next char */
                                   cst_strlen(token)+
                                   cst_strlen(ts->prepunctuation)+
                                   cst_strlen(ts->postpunctuation)));
	item_set_int(t,"line_number",ts->line_number);
    }

    delete_utterance(utt);
    ts_close(ts);
    return durs;
}