Ejemplo n.º 1
0
    cst_voice* register_cmu_us_no_wave( const char* voxdir )
    {
        cst_voice* v = new_voice();
        cst_lexicon* lex;

        v->name = "no_wave_voice";

        /* Set up basic values for synthesizing with this voice */
        usenglish_init( v );
        feat_set_string( v->features, "name", "cmu_us_no_wave" );

        /* Lexicon */
        lex = cmu_lex_init();
        feat_set( v->features, "lexicon", lexicon_val( lex ) );

        /* Intonation */
        feat_set_float( v->features, "int_f0_target_mean", 95.0 );
        feat_set_float( v->features, "int_f0_target_stddev", 11.0 );

        feat_set_float( v->features, "duration_stretch", 1.1 );

        /* Post lexical rules */
        feat_set( v->features, "postlex_func", uttfunc_val( lex->postlex ) );

        /* Waveform synthesis: diphone_synth */
        feat_set( v->features, "wave_synth_func", uttfunc_val( &no_wave_synth ) );

        cmu_us_no_wave = v;

        return cmu_us_no_wave;
    }
Ejemplo n.º 2
0
cst_voice *register_cmu_us_kal16(const char *voxdir)
{
    cst_voice *v = new_voice();

    usenglish_init(v);

    /* Set up basic values for synthesizing with this voice */
    feat_set_string(v->features,"name","cmu_us_kal_diphone");

    /* Lexicon */
    cmu_lex_init();
    feat_set(v->features,"lexicon",lexicon_val(&cmu_lex));

    /* Intonation */
    feat_set_float(v->features,"int_f0_target_mean",105.0);
    feat_set_float(v->features,"int_f0_target_stddev",14.0);

    /* Post lexical rules */
    feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex));

    /* Duration */
    feat_set_float(v->features,"duration_stretch",1.1);

    /* Waveform synthesis: diphone_synth */
    feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth));
    feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db));
    feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate);
    feat_set_string(v->features,"resynth_type","fixed");
    feat_set_string(v->features,"join_type","modified_lpc");

    cmu_us_kal_diphone = v;

    return cmu_us_kal_diphone;
}
Ejemplo n.º 3
0
cst_voice *register___VOICENAME__(const char *voxdir)
{
    cst_voice *v = new_voice();

    v->name = "__NICKNAME__";

    /* Sets up language specific parameters in the __VOICENAME__. */
    usenglish_init(v);

    /* Things that weren't filled in already. */
    feat_set_string(v->features,"name","__VOICENAME__");

    /* Lexicon */
    cmu_lex_init();
    feat_set(v->features,"lexicon",lexicon_val(&cmu_lex));

    /* Waveform synthesis */
    feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth));
    feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db));
    feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate);
    feat_set_string(v->features,"join_type","simple_join");
    feat_set_string(v->features,"resynth_type","fixed");

    /* Unit selection */
    __VOICENAME___db.unit_name_func = __VOICENAME___unit_name;

    __VOICENAME___clunits = v;

    return __VOICENAME___clunits;
}
cst_voice *register___VOICENAME__(const char *voxdir)
{
    cst_voice *v;
    cst_lexicon *lex;

    if (__VOICENAME___clunits)
        return __VOICENAME___clunits;  /* Already registered */

    v = new_voice();
    v->name = "__NICKNAME__";

    /* Sets up language specific parameters in the __VOICENAME__. */
    __FLITELANG___init(v);

    /* Things that weren't filled in already. */
    flite_feat_set_string(v->features,"name","__VOICENAME__");

    /* Duration model */
    flite_feat_set(v->features,"dur_cart",cart_val(&__VOICENAME___dur_cart));
    flite_feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)__VOICENAME___dur_stats));

    /* Lexicon */
    lex = __FLITELEX___init();
    flite_feat_set(v->features,"lexicon",lexicon_val(lex));
    flite_feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex));

    /* Waveform synthesis */
    flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth));
    flite_feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db));
    flite_feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate);
    flite_feat_set_string(v->features,"join_type","simple_join");
    flite_feat_set_string(v->features,"resynth_type","fixed");

    if ((voxdir != NULL) &&
        (__VOICENAME___db.sts->sts == NULL) &&
        (__VOICENAME___db.sts->sts_paged == NULL) &&
        (__VOICENAME___db.sts->frames == NULL))
        flite_mmap_clunit_voxdata(voxdir,v);

    /* Unit selection */
    __VOICENAME___db.unit_name_func = __VOICENAME___unit_name;

    __VOICENAME___clunits = v;

    return __VOICENAME___clunits;
}
Ejemplo n.º 5
0
cst_voice *register_cmu_us_kal(const char *voxdir)
{
    cst_voice *v;
    cst_lexicon *lex;

    if (cmu_us_kal_diphone)
        return cmu_us_kal_diphone;  /* Already registered */
    
    v  = new_voice();
    v->name = "kal";

    /* Set up basic values for synthesizing with this voice */
    usenglish_init(v);
#ifndef FLITE_PLUS_HTS_ENGINE
    flite_feat_set_string(v->features,"name","cmu_us_kal_diphone");
#endif /* !FLITE_PLUS_HTS_ENGINE */

    /* Lexicon */
    lex = cmu_lex_init();
    flite_feat_set(v->features,"lexicon",lexicon_val(lex));

#ifndef FLITE_PLUS_HTS_ENGINE
    /* Intonation */
    flite_feat_set_float(v->features,"int_f0_target_mean",95.0);
    flite_feat_set_float(v->features,"int_f0_target_stddev",11.0);

    flite_feat_set_float(v->features,"duration_stretch",1.1); 
#endif /* !FLITE_PLUS_HTS_ENGINE */

    /* Post lexical rules */
    flite_feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex));

#ifndef FLITE_PLUS_HTS_ENGINE
    /* Waveform synthesis: diphone_synth */
    flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth));
    flite_feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db));
    flite_feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate);
    flite_feat_set_string(v->features,"resynth_type","fixed");
    flite_feat_set_string(v->features,"join_type","modified_lpc");
#endif /* !FLITE_PLUS_HTS_ENGINE */

    cmu_us_kal_diphone = v;

    return cmu_us_kal_diphone;
}
Ejemplo n.º 6
0
void usenglish_init(cst_voice *v)
{
    us_text_init();

    /* utterance break function */
    feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break));

    /* Phoneset */
    feat_set(v->features,"phoneset",phoneset_val(&us_phoneset));
    feat_set_string(v->features,"silence",us_phoneset.silence);

    /* Text analyser */
    feat_set_string(v->features,"text_whitespace",us_english_whitespace);
    feat_set_string(v->features,"text_postpunctuation",us_english_punctuation);
    feat_set_string(v->features,"text_prepunctuation",
		    us_english_prepunctuation);
    feat_set_string(v->features,"text_singlecharsymbols",
		    us_english_singlecharsymbols);

    feat_set(v->features,"tokentowords_func",itemfunc_val(&us_tokentowords));

    /* Phrasing */
    feat_set(v->features,"phrasing_cart",cart_val(&us_phrasing_cart));

    /* Intonation */
    feat_set(v->features,"int_cart_accents",cart_val(&us_int_accent_cart));
    feat_set(v->features,"int_cart_tones",cart_val(&us_int_tone_cart));

#ifndef FLITE_PLUS_HTS_ENGINE
    /* Duration */
    feat_set(v->features,"dur_cart",cart_val(&us_durz_cart));
    feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)us_dur_stats));

    /* f0 model */
    feat_set(v->features,"f0_model_func",uttfunc_val(&us_f0_model));
#endif /* !FLITE_PLUS_HTS_ENGINE */

    /* Post lexical rules */
    feat_set(v->features,"postlex_func",uttfunc_val(&us_postlex));

    us_ff_register(v->ffunctions);
}
Ejemplo n.º 7
0
void russian_init(cst_utterance *u)
{
  feat_set(u->features,"phoneset",phoneset_val(&ru_phoneset));
  feat_set_string(u->features,"silence",ru_phoneset.silence);
  feat_set(u->features,"textanalysis_func",uttfunc_val(&russian_textanalysis));
  feat_set(u->features,"lexical_insertion_func",uttfunc_val(&russian_lexical_insertion));
  feat_set(u->features,"phrasing_func",uttfunc_val(&russian_phrasify));
  feat_set(u->features,"pause_insertion_func",uttfunc_val(&russian_pause_insertion));
  feat_set(u->features,"intonation_func",uttfunc_val(&do_nothing));
  feat_set(u->features,"postlex_func",uttfunc_val(russian_postlex_function));
  feat_set(u->features,"duration_model_func",uttfunc_val(do_nothing));
  feat_set(u->features,"f0_model_func",uttfunc_val(do_nothing));
  feat_set_string(u->features,"no_segment_duration_model","1");
  feat_set_string(u->features,"no_f0_target_model","1");
  feat_set(u->features,"wave_synth_func",uttfunc_val(&hts_synth));
  ru_ff_register(u->ffunctions);
}
cst_voice *register_cmu_us_kal16(const char *voxdir)
{
    cst_voice *v;
    cst_lexicon *lex;

    if (cmu_us_kal16_diphone)
        return cmu_us_kal16_diphone;  /* Already registered */

    v = new_voice();
    v->name = "kal16";

    /* Sets up language specific parameters in the cmu_us_kal16. */
    usenglish_init(v);

    feat_set_string(v->features,"name","cmu_us_kal16");

    feat_set_float(v->features,"int_f0_target_mean",95.0);
    feat_set_float(v->features,"int_f0_target_stddev",11.0);

    feat_set_float(v->features,"duration_stretch",1.1); 

    /* Lexicon */
    lex = cmu_lex_init();
    feat_set(v->features,"lexicon",lexicon_val(lex));
    feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex));

    /* Waveform synthesis */
    feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth));
    feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal16_db));
    feat_set_int(v->features,"sample_rate",cmu_us_kal16_db.sts->sample_rate);
/*    feat_set_string(v->features,"join_type","simple_join"); */
    feat_set_string(v->features,"join_type","modified_lpc");
    feat_set_string(v->features,"resynth_type","fixed");

    cmu_us_kal16_diphone = v;

    return cmu_us_kal16_diphone;
}
Ejemplo n.º 9
0
void flowm_init()
{
#ifdef DOTTS
    int i;
    cst_audio_streaming_info *asi;

    flite_init();        /* Initialize flite interface */

    for (i=0; VoxDefs[i].name; i++)
    {
        VoxDefs[i].v = (VoxDefs[i].rv)(NULL); /* register voice */

        /* Set up call back function for low level audio streaming */
        /* This way it plays the waveform as it synthesizes it */
        /* This is necessary for the slower (CG) voices */
        asi = new_audio_streaming_info();
        asi->asc = flowm_audio_stream_chunk;
        asi->min_buffsize = VoxDefs[i].min_buffsize;
        feat_set(VoxDefs[i].v->features,
                 "streaming_info",
                 audio_streaming_info_val(asi));

        /* Set up call back function for sending what tokens are being */
        /* synthesized and for keeping track of the current position in */
        /* the file */
        feat_set(VoxDefs[i].v->features,
                 "utt_user_callback",
                 uttfunc_val(flowm_utt_callback));

        /* For outputing results of a relation (only used in play) */
        feat_set(VoxDefs[i].v->features,
                 "post_synth_hook_func",
                 uttfunc_val(flowm_print_relation_callback));
    }

#endif
    return;
}
Ejemplo n.º 10
0
cst_voice *register_cmu_us_rms(const char *voxdir)
{
    cst_voice *vox;
    cst_lexicon *lex;

    if (cmu_us_rms_cg)
        return cmu_us_rms_cg;  /* Already registered */

    vox = new_voice();
    vox->name = "rms";

    /* Sets up language specific parameters in the cmu_us_rms. */
    usenglish_init(vox);

    /* Things that weren't filled in already. */
    flite_feat_set_string(vox->features,"name","cmu_us_rms");

    /* Lexicon */
    lex = cmu_lex_init();
    flite_feat_set(vox->features,"lexicon",lexicon_val(lex));
    flite_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex));

    /* No standard segment durations are needed as its done at the */
    /* HMM state level */
    flite_feat_set_string(vox->features,"no_segment_duration_model","1");
    flite_feat_set_string(vox->features,"no_f0_target_model","1");

    /* Waveform synthesis */
    flite_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth));
    flite_feat_set(vox->features,"cg_db",cg_db_val(&cmu_us_rms_cg_db));
    flite_feat_set_int(vox->features,"sample_rate",cmu_us_rms_cg_db.sample_rate);

    cmu_us_rms_cg = vox;

    return cmu_us_rms_cg;
}
Ejemplo n.º 11
0
cst_voice *register___VOICENAME__(const char *voxdir)
{
    cst_voice *vox;
    cst_lexicon *lex;

    if (__VOICENAME___cg)
        return __VOICENAME___cg;  /* Already registered */

    vox = new_voice();
    vox->name = "__NICKNAME__";

    /* Sets up language specific parameters in the __VOICENAME__. */
    __MIMICLANG___init(vox);

    /* Things that weren't filled in already. */
    mimic_feat_set_string(vox->features,"name","__VOICENAME__");

    /* Lexicon */
    lex = __MIMICLEX___init();
    mimic_feat_set(vox->features,"lexicon",lexicon_val(lex));
    mimic_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex));

    /* No standard segment durations are needed as its done at the */
    /* HMM state level */
    mimic_feat_set_string(vox->features,"no_segment_duration_model","1");
    mimic_feat_set_string(vox->features,"no_f0_target_model","1");

    /* Waveform synthesis */
    mimic_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth));
    mimic_feat_set(vox->features,"cg_db",cg_db_val(&__VOICENAME___cg_db));
    mimic_feat_set_int(vox->features,"sample_rate",__VOICENAME___cg_db.sample_rate);

    __VOICENAME___cg = vox;

    return __VOICENAME___cg;
}
cst_voice *register_cmu_time_awb(const char *voxdir)
{
    cst_voice *v = new_voice();

    v->name = "awb_time";

    /* Sets up language specific parameters in the voice. */
    usenglish_init(v);

    /* Things that weren't filled in already. */
    flite_feat_set_string(v->features,"name","cmu_time_awb");

    /* Lexicon */
    cmu_time_awb_lex.name = "cmu_time_awb";
    cmu_time_awb_lex.num_entries = cmu_time_awb_num_entries;
    cmu_time_awb_lex.num_bytes = cmu_time_awb_num_bytes;
    cmu_time_awb_lex.data = (unsigned char*) cmu_time_awb_lex_data;
    cmu_time_awb_lex.phone_table = (char**) cmu_time_awb_lex_phone_table;
    cmu_time_awb_lex.syl_boundary = cmu_syl_boundary;
    cmu_time_awb_lex.lts_rule_set = NULL;
    cmu_time_awb_lex.phone_hufftable = cmu_time_awb_lex_phones_huff_table;
    cmu_time_awb_lex.entry_hufftable = cmu_time_awb_lex_entries_huff_table;
    
    flite_feat_set(v->features,"lexicon",lexicon_val(&cmu_time_awb_lex));

    /* Waveform synthesis */
    flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth));
    flite_feat_set(v->features,"clunit_db",clunit_db_val(&cmu_time_awb_db));
    flite_feat_set_int(v->features,"sample_rate",cmu_time_awb_db.sts->sample_rate);
    flite_feat_set_string(v->features,"join_type","simple_join");
    flite_feat_set_string(v->features,"resynth_type","fixed");

    /* Unit selection */
    cmu_time_awb_db.unit_name_func = cmu_time_awb_unit_name;

    cmu_time_awb_ldom = v;

    return cmu_time_awb_ldom;
}
Ejemplo n.º 13
0
int main(int argc, char **argv)
{
    struct timeval tv;
    cst_voice *v;
    const char *filename;
    const char *outtype;
    int i;
    float durs;
    double time_start, time_end;
    int flite_verbose, flite_loop, flite_bench;
    int explicit_filename, explicit_text, explicit_phones;
#define ITER_MAX 3
    int bench_iter = 0;
    cst_features *extra_feats;

    filename = 0;
    outtype = "play";   /* default is to play */
    flite_verbose = FALSE;
    flite_loop = FALSE;
    flite_bench = FALSE;
    explicit_text = explicit_filename = explicit_phones = FALSE;
    extra_feats = new_features();

    flite_init();

    for (i=1; i<argc; i++)
    {
	if (cst_streq(argv[i],"--version"))
	{
	    flite_version();
	    return 1;
	}
	else if (cst_streq(argv[i],"-h") ||
		 cst_streq(argv[i],"--help") ||
		 cst_streq(argv[i],"-?"))
	    flite_usage();
	else if (cst_streq(argv[i],"-v"))
	    flite_verbose = TRUE;
	else if (cst_streq(argv[i],"-l"))
	    flite_loop = TRUE;
	else if (cst_streq(argv[i],"-b"))
	{
	    flite_bench = TRUE;
	    break; /* ignore other arguments */
	}
	else if ((cst_streq(argv[i],"-o")) && (i+1 < argc))
	{
	    outtype = argv[i+1];
	    i++;
	}
	else if (cst_streq(argv[i],"-f") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_filename = TRUE;
	    i++;
	}
	else if (cst_streq(argv[i],"-pw"))
	{
	    feat_set_string(extra_feats,"print_info_relation","Word");
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	}
	else if (cst_streq(argv[i],"-ps"))
	{
	    feat_set_string(extra_feats,"print_info_relation","Segment");
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	}
	else if (cst_streq(argv[i],"-pr") && (i+1 < argc))
	{
	    feat_set_string(extra_feats,"print_info_relation",argv[i+1]);
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	    i++;
	}
	else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s"))
		 && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],0);
	    i++;
	}
	else if (cst_streq(argv[i],"--seti") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"int");
	    i++;
	}
	else if (cst_streq(argv[i],"--setf") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"float");
	    i++;
	}
	else if (cst_streq(argv[i],"--sets") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"string");
	    i++;
	}
	else if (cst_streq(argv[i],"-p") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_phones = TRUE;
	    i++;
	}
	else if (cst_streq(argv[i],"-t") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_text = TRUE;
	    i++;
	}
	else if (filename)
	    outtype = argv[i];
	else
	    filename = argv[i];
    }

    if (filename == NULL) filename = "-";  /* stdin */
    v = REGISTER_VOX(NULL);
    feat_copy_into(extra_feats,v->features);
    durs = 0.0;

    if (flite_bench)
    {
	outtype = "none";
	filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike.";
	explicit_text = TRUE;
    }

loop:
    gettimeofday(&tv,NULL);
    time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0);

    if (explicit_phones)
	durs = flite_phones_to_speech(filename,v,outtype);
    else if ((strchr(filename,' ') && !explicit_filename) || explicit_text)
	durs = flite_text_to_speech(filename,v,outtype);
    else
	durs = flite_file_to_speech(filename,v,outtype);

    gettimeofday(&tv,NULL);
    time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0);

    if (flite_verbose || (flite_bench && bench_iter == ITER_MAX))
	printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n",
	       durs/(float)(time_end-time_start),
	       durs,
	       (float)(time_end-time_start));

    if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX))
	    goto loop;

    delete_features(extra_feats);
    UNREGISTER_VOX(v);

    return 0;
}
cst_voice *cst_cg_load_voice(const char *filename,
                             const cst_lang *lang_table)
{
    cst_voice *vox;
    cst_lexicon *lex = NULL;
    int i, end_of_features;
    const char *language;
    const char *xname;
    cst_cg_db *cg_db;
    char* fname;
    char* fval;
    cst_file vd;

    vd = cst_fopen(filename,CST_OPEN_READ | CST_OPEN_BINARY);
    if (vd == NULL)
    {
        cst_errmsg("Error load voice: can't open file %s\n",filename);
	return NULL;
    }

    if (cst_cg_read_header(vd) != 0)
    {
        cst_errmsg("Error load voice: %s does not have expected header\n",filename);
        cst_fclose(vd);
        return NULL;
    }

    vox = new_voice();

    /* Read voice features from the external file */
    /* Read until the feature is "end_of_features" */
    fname="";
    end_of_features = 0;
    while (end_of_features == 0)
    {
	cst_read_voice_feature(vd,&fname, &fval);
        if (cst_streq(fname,"end_of_features"))
            end_of_features = 1;
        else
        {
            xname = feat_own_string(vox->features,fname);
            flite_feat_set_string(vox->features,xname, fval);
        }
        cst_free(fname);
        cst_free(fval);
    }

    /* Load up cg_db from external file */
    cg_db = cst_cg_load_db(vox,vd);

    if (cg_db == NULL)
    {
	cst_fclose(vd);
        return NULL;
    }

    /* Use the language feature to initialize the correct voice */
    language = flite_get_param_string(vox->features, "language", "");

    /* Search Lang table for lang_init() and lex_init(); */
    for (i=0; lang_table[i].lang; i++)
    {
        if (cst_streq(language,lang_table[i].lang))
        {
            (lang_table[i].lang_init)(vox);
            lex = (lang_table[i].lex_init)();
            break;
        }
    }
    if (lex == NULL)
    {   /* Language is not supported */
	/* Delete allocated memory in cg_db */
	cst_cg_free_db(vd,cg_db);
	cst_fclose(vd);
        cst_errmsg("Error load voice: lang/lex %s not supported in this binary\n",language);
	return NULL;	
    }
    
    /* Things that weren't filled in already. */
    vox->name = cg_db->name;
    flite_feat_set_string(vox->features,"name",cg_db->name);
    flite_feat_set_string(vox->features,"pathname",filename);
    
    flite_feat_set(vox->features,"lexicon",lexicon_val(lex));
    flite_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex));

    /* No standard segment durations are needed as its done at the */
    /* HMM state level */
    flite_feat_set_string(vox->features,"no_segment_duration_model","1");
    flite_feat_set_string(vox->features,"no_f0_target_model","1");

    /* Waveform synthesis */
    flite_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth));
    flite_feat_set(vox->features,"cg_db",cg_db_val(cg_db));
    flite_feat_set_int(vox->features,"sample_rate",cg_db->sample_rate);

    cst_fclose(vd);
    return vox;
}
Ejemplo n.º 15
0
int main(int argc, char **argv)
{
    struct timeval tv;
    cst_voice *v;
    const char *filename;
    const char *outtype;
    cst_voice *desired_voice = 0;
    const char *voicedir = NULL;
    int i;
    float durs;
    double time_start, time_end;
    int flite_verbose, flite_loop, flite_bench;
    int explicit_filename, explicit_text, explicit_phones, ssml_mode;
#define ITER_MAX 3
    int bench_iter = 0;
    cst_features *extra_feats;
    const char *lex_addenda_file = NULL;
    const char *voicedumpfile = NULL;
    cst_audio_streaming_info *asi;

    filename = 0;
    outtype = "play";   /* default is to play */
    flite_verbose = FALSE;
    flite_loop = FALSE;
    flite_bench = FALSE;
    explicit_text = explicit_filename = explicit_phones = FALSE;
    ssml_mode = FALSE;
    extra_feats = new_features();

    flite_init();
    flite_add_lang("eng",usenglish_init,cmu_lex_init);

    for (i=1; i<argc; i++)
    {
	if (cst_streq(argv[i],"--version"))
	{
	    flite_version();
	    return 1;
	}
	else if (cst_streq(argv[i],"-h") ||
		 cst_streq(argv[i],"--help") ||
		 cst_streq(argv[i],"-?"))
	    flite_usage();
	else if (cst_streq(argv[i],"-v"))
	    flite_verbose = TRUE;
	else if (cst_streq(argv[i],"-lv"))
        {
            if (flite_voice_list == NULL)
                flite_set_voice_list(voicedir);
            flite_voice_list_print();
            exit(0);
        }
	else if (cst_streq(argv[i],"-l"))
	    flite_loop = TRUE;
	else if (cst_streq(argv[i],"-b"))
	{
	    flite_bench = TRUE;
	    break; /* ignore other arguments */
	}
	else if ((cst_streq(argv[i],"-o")) && (i+1 < argc))
	{
	    outtype = argv[i+1];
	    i++;
	}
	else if ((cst_streq(argv[i],"-voice")) && (i+1 < argc))
	{
            if (flite_voice_list == NULL)
                flite_set_voice_list(voicedir);
            desired_voice = flite_voice_select(argv[i+1]);
	    i++;
	}
	else if ((cst_streq(argv[i],"-voicedir")) && (i+1 < argc))
	{
            voicedir = argv[i+1];
            if (flite_voice_list == NULL)
                flite_set_voice_list(voicedir);
	    i++;
	}
	else if ((cst_streq(argv[i],"-add_lex")) && (i+1 < argc))
	{
            lex_addenda_file = argv[i+1];
	    i++;
	}
	else if (cst_streq(argv[i],"-f") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_filename = TRUE;
	    i++;
	}
	else if (cst_streq(argv[i],"-pw"))
	{
	    feat_set_string(extra_feats,"print_info_relation","Word");
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	}
	else if (cst_streq(argv[i],"-ps"))
	{
	    feat_set_string(extra_feats,"print_info_relation","Segment");
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	}
        else if (cst_streq(argv[i],"-ssml"))
        {
            ssml_mode = TRUE;
        }
	else if (cst_streq(argv[i],"-pr") && (i+1 < argc))
	{
	    feat_set_string(extra_feats,"print_info_relation",argv[i+1]);
	    feat_set(extra_feats,"post_synth_hook_func",
		     uttfunc_val(&print_info));
	    i++;
	}
	else if (cst_streq(argv[i],"-voicedump") && (i+1 < argc))
	{
            voicedumpfile = argv[i+1];
	    i++;
	}
	else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s"))
		 && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],0);
	    i++;
	}
	else if (cst_streq(argv[i],"--seti") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"int");
	    i++;
	}
	else if (cst_streq(argv[i],"--setf") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"float");
	    i++;
	}
	else if (cst_streq(argv[i],"--sets") && (i+1 < argc))
	{
	    ef_set(extra_feats,argv[i+1],"string");
	    i++;
	}
	else if (cst_streq(argv[i],"-p") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_phones = TRUE;
	    i++;
	}
	else if (cst_streq(argv[i],"-t") && (i+1 < argc))
	{
	    filename = argv[i+1];
	    explicit_text = TRUE;
	    i++;
	}
	else if (filename)
	    outtype = argv[i];
	else
	    filename = argv[i];
    }

    if (filename == NULL) filename = "-";  /* stdin */
    if (flite_voice_list == NULL)
        flite_set_voice_list(voicedir);
    if (desired_voice == 0)
        desired_voice = flite_voice_select(NULL);

    v = desired_voice;
    feat_copy_into(extra_feats,v->features);
    durs = 0.0;

    if (voicedumpfile != NULL)
    {
        flite_voice_dump(v,voicedumpfile);
        exit(0);
    }

    if (lex_addenda_file)
        flite_voice_add_lex_addenda(v,lex_addenda_file);

    if (cst_streq("stream",outtype))
    {
        asi = new_audio_streaming_info();
        asi->asc = audio_stream_chunk;
        feat_set(v->features,"streaming_info",audio_streaming_info_val(asi));
    }

    if (flite_bench)
    {
	outtype = "none";
	filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike.";
	explicit_text = TRUE;
    }

loop:
    gettimeofday(&tv,NULL);
    time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0);

    if (explicit_phones)
	durs = flite_phones_to_speech(filename,v,outtype);
    else if ((strchr(filename,' ') && !explicit_filename) || explicit_text)
    {
        if (ssml_mode)
            durs = flite_ssml_text_to_speech(filename,v,outtype);
        else
            durs = flite_text_to_speech(filename,v,outtype);
    }
    else
    {
        if (ssml_mode)
            durs = flite_ssml_file_to_speech(filename,v,outtype);
        else
            durs = flite_file_to_speech(filename,v,outtype);
    }

    gettimeofday(&tv,NULL);
    time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0);

    if (flite_verbose || (flite_bench && bench_iter == ITER_MAX))
	printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n",
	       durs/(float)(time_end-time_start),
	       durs,
	       (float)(time_end-time_start));

    if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX))
	    goto loop;

    delete_features(extra_feats);
    delete_val(flite_voice_list); flite_voice_list=0;
    /*    cst_alloc_debug_summary(); */

    return 0;
}