示例#1
0
int main(int argc, char **argv)
{
    cst_wave *nw, *all;
    cst_val *files;
    const cst_val *w;
    cst_val *wavelist;
    cst_features *args;
    int i,j;
    float ntime;
    int stime;
    const char *nwfile;

    args = new_features();
    files =
        cst_args(argv,argc,
                 "usage: combine_waves OPTIONS\n"
                 "Combine waves into single waveform\n"
		 "-o <string>  Output waveform\n"
		 "-f <int>     Input sample rate (for raw input)\n"
		 "-itype <string>  Input type, raw or headered\n"
		 "-wavelist <string>  File containing times and wave filenames\n",
                 args);

    wavelist = get_wavelist(get_param_string(args,"-wavelist","-"));

    if (wavelist == 0)
	return -1;

    all = new_wave();
    for (w = wavelist; w; w = val_cdr(w))
    {
	ntime = decode_time(val_string(val_car(w)));
	nwfile = val_string(val_car(val_cdr(w)));

	nw = new_wave();
	if (cst_wave_load_riff(nw,nwfile) != CST_OK_FORMAT)
	{
	    fprintf(stderr,
		    "combine_waves: can't read file or wrong format \"%s\"\n",
		    nwfile);
	    continue;
	}

	stime = ntime * nw->sample_rate;

	cst_wave_resize(all,stime+nw->num_samples,1);
	
	for (i=0,j=stime; i<nw->num_samples; i++,j++)
	{
	    /* this will cause overflows */
	    all->samples[j] += nw->samples[i];
	}
	delete_wave(nw);
    }

    cst_wave_save_riff(all,get_param_string(args,"-o","-"));

    return 0;
}
示例#2
0
void test_create(void)
{
   cst_wave *w = NULL;
   w = new_wave();
   TEST_CHECK(w != NULL);
   TEST_CHECK(w->num_samples == 0);
   TEST_CHECK(w->type == NULL);
   delete_wave(w);
}
示例#3
0
void test_resize(void)
{
   cst_wave *w = new_wave();
   cst_wave_resize(w, 200, 2);
   TEST_CHECK(w->num_samples == 200);
   TEST_CHECK(w->num_channels = 2);
   TEST_CHECK(cst_wave_resize(NULL, 200, 2) < 0);
   delete_wave(w);
}
示例#4
0
void test_rescale(void)
{
   int i;
   cst_wave *w = new_wave();
   cst_wave_resize(w, 10, 2);
   for (i = 0; i < 10; i++)
      w->samples[i] = 10 + i;
   cst_wave_rescale(w, 65536 * 2); //scale x2
   for (i = 0; i < 10; i++)
      TEST_CHECK(w->samples[i] == (10 + i) * 2);
   delete_wave(w);
}
cst_wave *lpc_resynth(cst_lpcres *lpcres)
{
    cst_wave *w;
    int i,j,r,o,k;
    int ci,cr;
    float *outbuf, *lpccoefs;
    int pm_size_samps;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(float,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(float,lpcres->num_channels);

    for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++)
    {
	pm_size_samps = lpcres->sizes[i];

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	{
	    lpccoefs[k] = (float)((((double)lpcres->frames[i][k])/65535.0)*
			   lpcres->lpc_range) + lpcres->lpc_min;
	}
	/* Note we don't zero the lead in from the previous part */
	/* seems like you should but it makes it worse if you do */
/*	memset(outbuf,0,sizeof(float)*(1+lpcres->num_channels)); */

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
            outbuf[o] = (float)cst_ulaw_to_short(lpcres->residual[r]);
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += lpccoefs[ci] * outbuf[cr];
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    w->samples[r] = (short)(outbuf[o]);
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
    }

    cst_free(outbuf);
    cst_free(lpccoefs);

    return w;

}
cst_wave *lpc_resynth_sfp(cst_lpcres *lpcres)
{
    /* The fixed point spike excited, without floats */
    cst_wave *w;
    int i,j,r,o,k;
    int ci,cr;
    int *outbuf, *lpccoefs;
    int pm_size_samps, ilpc_min, ilpc_range;
    //int pp = 0;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(int,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(int,lpcres->num_channels);
    ilpc_min = (int)(lpcres->lpc_min*32768.0);
    /* assume range is never > abs(16) */
    ilpc_range = (int)(lpcres->lpc_range*2048.0);

    for (r=0,o=lpcres->num_channels,i=0; i < lpcres->num_frames; i++)
    {
	pm_size_samps = lpcres->sizes[i];

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	    lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2;

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
	    outbuf[o] = (int)cst_ulaw_to_short(lpcres->residual[r]);
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += (lpccoefs[ci]*outbuf[cr])/16384;
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    w->samples[r] = (short)outbuf[o];
	    //pp = outbuf[o];
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
    }

    cst_free(outbuf);
    cst_free(lpccoefs);

    return w;

}
示例#7
0
float flite_ssml_to_speech(const char *filename,
                           cst_voice *voice,
                           const char *outtype)
{
    cst_tokenstream *ts;
    int fp;
    cst_wave *w;
    float d;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for ssml reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    d = flite_ssml_to_speech_ts(ts,voice,outtype);

    ts_close(ts);
    
    return d;

}
示例#8
0
cst_utterance *join_units_modified_lpc(cst_utterance *utt)
{
    cst_wave *w = 0;
    cst_lpcres *lpcres;
    const char *resynth_type;
    const cst_val *streaming_info_val;

    resynth_type = get_param_string(utt->features, "resynth_type", "float");

    f0_targets_to_pm(utt);
    concat_units(utt);

    lpcres = val_lpcres(utt_feat_val(utt, "target_lpcres"));

    streaming_info_val = get_param_val(utt->features, "streaming_info", NULL);
    if (streaming_info_val)
    {
        lpcres->asi = val_audio_streaming_info(streaming_info_val);
        lpcres->asi->utt = utt;
    }

    if (cst_streq(resynth_type, "float"))
        w = lpc_resynth(lpcres);
    else if (cst_streq(resynth_type, "fixed"))
    {
        w = lpc_resynth_fixedpoint(lpcres);
    }
    else
    {
        cst_errmsg("unknown resynthesis type %s\n", resynth_type);
        cst_error();            /* Should not happen */
    }

    if (w == NULL)
    {
        /* Synthesis Failed, probably because it was interrupted */
        utt_set_feat_int(utt, "Interrupted", 1);
        w = new_wave();
    }

    utt_set_wave(utt, w);

    return utt;
}
int main(int argc, char **argv)
{
    cst_wave *in, *out;
    cst_val *files;
    cst_features *args;
    int i,j;
    int w, a, t;

    args = new_features();
    files =
        cst_args(argv,argc,
                 "usage: dcoffset_wave OPTIONS\n"
                 "Subtract window average from waveform\n"
		 "-i <string>  Input waveform\n"
		 "-o <string>  Output waveform\n"
		 "-w <int>     Window size (in samples)\n",
                 args);

    w = flite_get_param_int(args,"-w",20);
    in = new_wave();
    cst_wave_load_riff(in,flite_get_param_string(args,"-i","-"));

    out = copy_wave(in);

    for (i=0; i<=out->num_samples; i++)
    {
        for (t=a=0,j=i-w/2; j < i+w/2; j++)
        {
            if ((j > 0) && (j < out->num_samples))
            {
                t += 1;
                a+=in->samples[j];
            }
        }
        /*        printf("%d %d %d %d %d\n",i,out->samples[i],a/t,t,out->samples[i]-a/t); */
        out->samples[i] -= a/t;
    }

    cst_wave_save_riff(out,flite_get_param_string(args,"-o","-"));

    return 0;
}
示例#10
0
int main(int argc, char **argv)
{
    cst_track *lpc;
    cst_wave *sig, *sig2;
    cst_sts *sts;

    if (argc != 6)
    {
	fprintf(stderr,"usage: find_sts lpc_min lpc_range LPC WAVEFILE STS\n");
	return 1;
    }

    lpc_min = atof(argv[1]);
    lpc_range = atof(argv[2]);

    lpc = new_track();
    cst_track_load_est(lpc,argv[3]);
    sig = new_wave();
    if (cst_wave_load_riff(sig,argv[4]) == CST_WRONG_FORMAT)
    {
	fprintf(stderr,
		"cannot load waveform, format unrecognized, from \"%s\"\n",
		argv[4]);
	exit(-1);
    }

    sts = find_sts(sig,lpc);

    /* See if it worked */
    sig2 = reconstruct_wave(sig,sts,lpc);

    compare_waves(sig,sig2);
    cst_wave_save_riff(sig2,"sig2.wav");

    save_sts(sts,lpc,sig,argv[5]);

    return 0;
}
示例#11
0
int main(int argc, char **argv)
{
    cst_wave *w;
    cst_relation *r;
    cst_utterance *u;

    if (argc != 3)
    {
        fprintf(stderr, "usage: mimic_play_wave_sync WAVEFILE LABELFILE\n");
        return 1;
    }

    w = new_wave();
    if (cst_wave_load_riff(w, argv[1]) != CST_OK_FORMAT)
        return -1;
    u = new_utterance();
    r = utt_relation_create(u, "FOO");
    if (relation_load(r, argv[2]) != CST_OK_FORMAT)
        return -1;

    mimic_play_wave_sync(w, r, my_call_back);

    return 0;
}
cst_wave *lpc_resynth_fixedpoint(cst_lpcres *lpcres)
{
    /* The fixed point version, without floats */
    cst_wave *w;
    int i,j,r,o,k;
    int stream_mark;
    int ci,cr;
    int *outbuf, *lpccoefs;
    int pm_size_samps, ilpc_min, ilpc_range;
    //int pp = 0;
    int rc = CST_AUDIO_STREAM_CONT;

    /* Get a new wave to build the signal into */
    w = new_wave();
    cst_wave_resize(w,lpcres->num_samples,1);
    w->sample_rate = lpcres->sample_rate;
    /* outbuf is a circular buffer with past relevant samples in it */
    outbuf = cst_alloc(int,1+lpcres->num_channels);
    /* unpacked lpc coefficients */
    lpccoefs = cst_alloc(int,lpcres->num_channels);
    ilpc_min = (int)(lpcres->lpc_min*32768.0);
    /* assume range is never > abs(16) */
    ilpc_range = (int)(lpcres->lpc_range*2048.0);

    stream_mark = 0;
    for (r=0,o=lpcres->num_channels,i=0; 
         (rc == CST_AUDIO_STREAM_CONT) && (i < lpcres->num_frames); 
         i++)
    {
	pm_size_samps = lpcres->sizes[i];

        if (lpcres->delayed_decoding)
        {
            /* do decoding for this frame */
            add_residual_g721vuv(lpcres->sizes[i],
                                 &lpcres->residual[r],
                                 lpcres->sizes[i],
                                 lpcres->packed_residuals[i]);
        }

	/* Unpack the LPC coefficients */
	for (k=0; k<lpcres->num_channels; k++)
	    lpccoefs[k]=((lpcres->frames[i][k]/2*ilpc_range)/2048+ilpc_min)/2;

	/* resynthesis the signal */
	for (j=0; j < pm_size_samps; j++,r++)
	{
            outbuf[o] = (int)ulaw_to_short_table[lpcres->residual[r]];
	    outbuf[o] *= 16384;
	    cr = (o == 0 ? lpcres->num_channels : o-1);
	    for (ci=0; ci < lpcres->num_channels; ci++)
	    {
		outbuf[o] += lpccoefs[ci]*outbuf[cr];
		cr = (cr == 0 ? lpcres->num_channels : cr-1);
	    }
	    outbuf[o] /= 16384;
	    w->samples[r] = (short)outbuf[o];
	    //pp = outbuf[o];
	    o = (o == lpcres->num_channels ? 0 : o+1);
	}
        if (lpcres->asi && (r-stream_mark > lpcres->asi->min_buffsize))
        {
             rc = (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,0,
                                 lpcres->asi);
             stream_mark = r;
        }
    }

    if ((lpcres->asi) && (rc == CST_AUDIO_STREAM_CONT))
        (*lpcres->asi->asc)(w,stream_mark,r-stream_mark,1,lpcres->asi);

    cst_free(outbuf);
    cst_free(lpccoefs);
    w->num_samples = r;  /* just to be safe */

    return w;

}
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float d, durs = 0;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    /* If its a file to write to delete it as we're going to */
    /* incrementally append to it                            */
    if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none"))
    {
	cst_wave *w;
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt */
	    d = flite_tokens_to_speech(utt,voice,outtype);
	    utt = NULL;
	    if (d < 0)
		goto out;
	    durs += d;

	    if (ts_eof(ts))
		goto out;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
	item_set_int(t,"file_pos",ts->file_pos);
	item_set_int(t,"line_number",ts->line_number);
    }

out:
    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
int main(int argc, char **argv)
{
    cst_wave *w;
    int port;
    char *server;
    char *encoding; 
    int i,iw;

    port = CST_AUDIO_DEFAULT_PORT;
    server = CST_AUDIO_DEFAULT_SERVER;
    encoding = CST_AUDIO_DEFAULT_ENCODING;

    if (argc == 1)
    {
	play_client_main_usage();
	return 1;
    }

    if ((cst_streq("-h",argv[1])) ||
	(cst_streq("-help",argv[1])) ||
	(cst_streq("--help",argv[1])))
    {
	play_client_main_usage();
	return 1;
    }
    iw = 1;

    if (cst_streq("-s",argv[iw]))
    {
	if (argc < iw+1)
	{
	    fprintf(stderr,"ERROR: no servername given\n");
	    play_client_main_usage();
	    return 1;
	}
	server = argv[iw+1];
	iw+=2;
    }

    if (cst_streq("-p",argv[iw]))
    {
	if (argc < iw+1)
	{
	    fprintf(stderr,"ERROR: no port given\n");
	    play_client_main_usage();
	    return 1;
	}
	port = atoi(argv[iw+1]);
	iw+=2;
    }

    if (cst_streq("-e",argv[iw]))
    {
	if (argc < iw+1)
	{
	    fprintf(stderr,"ERROR: no encoding given\n");
	    play_client_main_usage();
	    return 1;
	}
	encoding = argv[iw+1];
	iw+=2;
    }

    for (i=iw; i<argc; i++)
    {
	w = new_wave();
	if (cst_wave_load_riff(w,argv[i]) != CST_OK_FORMAT)
	{
	    fprintf(stderr,
		    "play_wave: can't read file or wrong format \"%s\"\n",
		    argv[i]);
	    continue;
	}
	play_wave_client(w,server,port,encoding);
	delete_wave(w);
    }

    return 0;
}
示例#15
0
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float durs = 0;
    int num_tokens;
    cst_wave *w;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    int fp;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((cst_strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt, so synthesize it */
            if (utt_user_callback)
                utt = (utt_user_callback)(utt);

            if (utt)
            {
                utt = flite_do_synth(utt,voice,utt_synth_tokens);
                durs += flite_process_output(utt,outtype,TRUE);
                delete_utterance(utt); utt = NULL;
            }
            else 
                break;

	    if (ts_eof(ts)) break;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
        /* Mark it at the beginning of the token */
	item_set_int(t,"file_pos",
                     ts->file_pos-(1+ /* as we are already on the next char */
                                   cst_strlen(token)+
                                   cst_strlen(ts->prepunctuation)+
                                   cst_strlen(ts->postpunctuation)));
	item_set_int(t,"line_number",ts->line_number);
    }

    delete_utterance(utt);
    ts_close(ts);
    return durs;
}