cst_voice* register_cmu_us_no_wave( const char* voxdir ) { cst_voice* v = new_voice(); cst_lexicon* lex; v->name = "no_wave_voice"; /* Set up basic values for synthesizing with this voice */ usenglish_init( v ); feat_set_string( v->features, "name", "cmu_us_no_wave" ); /* Lexicon */ lex = cmu_lex_init(); feat_set( v->features, "lexicon", lexicon_val( lex ) ); /* Intonation */ feat_set_float( v->features, "int_f0_target_mean", 95.0 ); feat_set_float( v->features, "int_f0_target_stddev", 11.0 ); feat_set_float( v->features, "duration_stretch", 1.1 ); /* Post lexical rules */ feat_set( v->features, "postlex_func", uttfunc_val( lex->postlex ) ); /* Waveform synthesis: diphone_synth */ feat_set( v->features, "wave_synth_func", uttfunc_val( &no_wave_synth ) ); cmu_us_no_wave = v; return cmu_us_no_wave; }
cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v = new_voice(); usenglish_init(v); /* Set up basic values for synthesizing with this voice */ feat_set_string(v->features,"name","cmu_us_kal_diphone"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Intonation */ feat_set_float(v->features,"int_f0_target_mean",105.0); feat_set_float(v->features,"int_f0_target_stddev",14.0); /* Post lexical rules */ feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex)); /* Duration */ feat_set_float(v->features,"duration_stretch",1.1); /* Waveform synthesis: diphone_synth */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate); feat_set_string(v->features,"resynth_type","fixed"); feat_set_string(v->features,"join_type","modified_lpc"); cmu_us_kal_diphone = v; return cmu_us_kal_diphone; }
cst_voice *register___VOICENAME__(const char *voxdir) { cst_voice *v = new_voice(); v->name = "__NICKNAME__"; /* Sets up language specific parameters in the __VOICENAME__. */ usenglish_init(v); /* Things that weren't filled in already. */ feat_set_string(v->features,"name","__VOICENAME__"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth)); feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db)); feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate); feat_set_string(v->features,"join_type","simple_join"); feat_set_string(v->features,"resynth_type","fixed"); /* Unit selection */ __VOICENAME___db.unit_name_func = __VOICENAME___unit_name; __VOICENAME___clunits = v; return __VOICENAME___clunits; }
cst_voice *register___VOICENAME__(const char *voxdir) { cst_voice *v; cst_lexicon *lex; if (__VOICENAME___clunits) return __VOICENAME___clunits; /* Already registered */ v = new_voice(); v->name = "__NICKNAME__"; /* Sets up language specific parameters in the __VOICENAME__. */ __FLITELANG___init(v); /* Things that weren't filled in already. */ flite_feat_set_string(v->features,"name","__VOICENAME__"); /* Duration model */ flite_feat_set(v->features,"dur_cart",cart_val(&__VOICENAME___dur_cart)); flite_feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)__VOICENAME___dur_stats)); /* Lexicon */ lex = __FLITELEX___init(); flite_feat_set(v->features,"lexicon",lexicon_val(lex)); flite_feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex)); /* Waveform synthesis */ flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth)); flite_feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db)); flite_feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate); flite_feat_set_string(v->features,"join_type","simple_join"); flite_feat_set_string(v->features,"resynth_type","fixed"); if ((voxdir != NULL) && (__VOICENAME___db.sts->sts == NULL) && (__VOICENAME___db.sts->sts_paged == NULL) && (__VOICENAME___db.sts->frames == NULL)) flite_mmap_clunit_voxdata(voxdir,v); /* Unit selection */ __VOICENAME___db.unit_name_func = __VOICENAME___unit_name; __VOICENAME___clunits = v; return __VOICENAME___clunits; }
cst_voice *register_cmu_us_kal(const char *voxdir) { cst_voice *v; cst_lexicon *lex; if (cmu_us_kal_diphone) return cmu_us_kal_diphone; /* Already registered */ v = new_voice(); v->name = "kal"; /* Set up basic values for synthesizing with this voice */ usenglish_init(v); #ifndef FLITE_PLUS_HTS_ENGINE flite_feat_set_string(v->features,"name","cmu_us_kal_diphone"); #endif /* !FLITE_PLUS_HTS_ENGINE */ /* Lexicon */ lex = cmu_lex_init(); flite_feat_set(v->features,"lexicon",lexicon_val(lex)); #ifndef FLITE_PLUS_HTS_ENGINE /* Intonation */ flite_feat_set_float(v->features,"int_f0_target_mean",95.0); flite_feat_set_float(v->features,"int_f0_target_stddev",11.0); flite_feat_set_float(v->features,"duration_stretch",1.1); #endif /* !FLITE_PLUS_HTS_ENGINE */ /* Post lexical rules */ flite_feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex)); #ifndef FLITE_PLUS_HTS_ENGINE /* Waveform synthesis: diphone_synth */ flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); flite_feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db)); flite_feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate); flite_feat_set_string(v->features,"resynth_type","fixed"); flite_feat_set_string(v->features,"join_type","modified_lpc"); #endif /* !FLITE_PLUS_HTS_ENGINE */ cmu_us_kal_diphone = v; return cmu_us_kal_diphone; }
void usenglish_init(cst_voice *v) { us_text_init(); /* utterance break function */ feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break)); /* Phoneset */ feat_set(v->features,"phoneset",phoneset_val(&us_phoneset)); feat_set_string(v->features,"silence",us_phoneset.silence); /* Text analyser */ feat_set_string(v->features,"text_whitespace",us_english_whitespace); feat_set_string(v->features,"text_postpunctuation",us_english_punctuation); feat_set_string(v->features,"text_prepunctuation", us_english_prepunctuation); feat_set_string(v->features,"text_singlecharsymbols", us_english_singlecharsymbols); feat_set(v->features,"tokentowords_func",itemfunc_val(&us_tokentowords)); /* Phrasing */ feat_set(v->features,"phrasing_cart",cart_val(&us_phrasing_cart)); /* Intonation */ feat_set(v->features,"int_cart_accents",cart_val(&us_int_accent_cart)); feat_set(v->features,"int_cart_tones",cart_val(&us_int_tone_cart)); #ifndef FLITE_PLUS_HTS_ENGINE /* Duration */ feat_set(v->features,"dur_cart",cart_val(&us_durz_cart)); feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)us_dur_stats)); /* f0 model */ feat_set(v->features,"f0_model_func",uttfunc_val(&us_f0_model)); #endif /* !FLITE_PLUS_HTS_ENGINE */ /* Post lexical rules */ feat_set(v->features,"postlex_func",uttfunc_val(&us_postlex)); us_ff_register(v->ffunctions); }
void russian_init(cst_utterance *u) { feat_set(u->features,"phoneset",phoneset_val(&ru_phoneset)); feat_set_string(u->features,"silence",ru_phoneset.silence); feat_set(u->features,"textanalysis_func",uttfunc_val(&russian_textanalysis)); feat_set(u->features,"lexical_insertion_func",uttfunc_val(&russian_lexical_insertion)); feat_set(u->features,"phrasing_func",uttfunc_val(&russian_phrasify)); feat_set(u->features,"pause_insertion_func",uttfunc_val(&russian_pause_insertion)); feat_set(u->features,"intonation_func",uttfunc_val(&do_nothing)); feat_set(u->features,"postlex_func",uttfunc_val(russian_postlex_function)); feat_set(u->features,"duration_model_func",uttfunc_val(do_nothing)); feat_set(u->features,"f0_model_func",uttfunc_val(do_nothing)); feat_set_string(u->features,"no_segment_duration_model","1"); feat_set_string(u->features,"no_f0_target_model","1"); feat_set(u->features,"wave_synth_func",uttfunc_val(&hts_synth)); ru_ff_register(u->ffunctions); }
cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v; cst_lexicon *lex; if (cmu_us_kal16_diphone) return cmu_us_kal16_diphone; /* Already registered */ v = new_voice(); v->name = "kal16"; /* Sets up language specific parameters in the cmu_us_kal16. */ usenglish_init(v); feat_set_string(v->features,"name","cmu_us_kal16"); feat_set_float(v->features,"int_f0_target_mean",95.0); feat_set_float(v->features,"int_f0_target_stddev",11.0); feat_set_float(v->features,"duration_stretch",1.1); /* Lexicon */ lex = cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(lex)); feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal16_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal16_db.sts->sample_rate); /* feat_set_string(v->features,"join_type","simple_join"); */ feat_set_string(v->features,"join_type","modified_lpc"); feat_set_string(v->features,"resynth_type","fixed"); cmu_us_kal16_diphone = v; return cmu_us_kal16_diphone; }
void flowm_init() { #ifdef DOTTS int i; cst_audio_streaming_info *asi; flite_init(); /* Initialize flite interface */ for (i=0; VoxDefs[i].name; i++) { VoxDefs[i].v = (VoxDefs[i].rv)(NULL); /* register voice */ /* Set up call back function for low level audio streaming */ /* This way it plays the waveform as it synthesizes it */ /* This is necessary for the slower (CG) voices */ asi = new_audio_streaming_info(); asi->asc = flowm_audio_stream_chunk; asi->min_buffsize = VoxDefs[i].min_buffsize; feat_set(VoxDefs[i].v->features, "streaming_info", audio_streaming_info_val(asi)); /* Set up call back function for sending what tokens are being */ /* synthesized and for keeping track of the current position in */ /* the file */ feat_set(VoxDefs[i].v->features, "utt_user_callback", uttfunc_val(flowm_utt_callback)); /* For outputing results of a relation (only used in play) */ feat_set(VoxDefs[i].v->features, "post_synth_hook_func", uttfunc_val(flowm_print_relation_callback)); } #endif return; }
cst_voice *register_cmu_us_rms(const char *voxdir) { cst_voice *vox; cst_lexicon *lex; if (cmu_us_rms_cg) return cmu_us_rms_cg; /* Already registered */ vox = new_voice(); vox->name = "rms"; /* Sets up language specific parameters in the cmu_us_rms. */ usenglish_init(vox); /* Things that weren't filled in already. */ flite_feat_set_string(vox->features,"name","cmu_us_rms"); /* Lexicon */ lex = cmu_lex_init(); flite_feat_set(vox->features,"lexicon",lexicon_val(lex)); flite_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex)); /* No standard segment durations are needed as its done at the */ /* HMM state level */ flite_feat_set_string(vox->features,"no_segment_duration_model","1"); flite_feat_set_string(vox->features,"no_f0_target_model","1"); /* Waveform synthesis */ flite_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth)); flite_feat_set(vox->features,"cg_db",cg_db_val(&cmu_us_rms_cg_db)); flite_feat_set_int(vox->features,"sample_rate",cmu_us_rms_cg_db.sample_rate); cmu_us_rms_cg = vox; return cmu_us_rms_cg; }
cst_voice *register___VOICENAME__(const char *voxdir) { cst_voice *vox; cst_lexicon *lex; if (__VOICENAME___cg) return __VOICENAME___cg; /* Already registered */ vox = new_voice(); vox->name = "__NICKNAME__"; /* Sets up language specific parameters in the __VOICENAME__. */ __MIMICLANG___init(vox); /* Things that weren't filled in already. */ mimic_feat_set_string(vox->features,"name","__VOICENAME__"); /* Lexicon */ lex = __MIMICLEX___init(); mimic_feat_set(vox->features,"lexicon",lexicon_val(lex)); mimic_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex)); /* No standard segment durations are needed as its done at the */ /* HMM state level */ mimic_feat_set_string(vox->features,"no_segment_duration_model","1"); mimic_feat_set_string(vox->features,"no_f0_target_model","1"); /* Waveform synthesis */ mimic_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth)); mimic_feat_set(vox->features,"cg_db",cg_db_val(&__VOICENAME___cg_db)); mimic_feat_set_int(vox->features,"sample_rate",__VOICENAME___cg_db.sample_rate); __VOICENAME___cg = vox; return __VOICENAME___cg; }
cst_voice *register_cmu_time_awb(const char *voxdir) { cst_voice *v = new_voice(); v->name = "awb_time"; /* Sets up language specific parameters in the voice. */ usenglish_init(v); /* Things that weren't filled in already. */ flite_feat_set_string(v->features,"name","cmu_time_awb"); /* Lexicon */ cmu_time_awb_lex.name = "cmu_time_awb"; cmu_time_awb_lex.num_entries = cmu_time_awb_num_entries; cmu_time_awb_lex.num_bytes = cmu_time_awb_num_bytes; cmu_time_awb_lex.data = (unsigned char*) cmu_time_awb_lex_data; cmu_time_awb_lex.phone_table = (char**) cmu_time_awb_lex_phone_table; cmu_time_awb_lex.syl_boundary = cmu_syl_boundary; cmu_time_awb_lex.lts_rule_set = NULL; cmu_time_awb_lex.phone_hufftable = cmu_time_awb_lex_phones_huff_table; cmu_time_awb_lex.entry_hufftable = cmu_time_awb_lex_entries_huff_table; flite_feat_set(v->features,"lexicon",lexicon_val(&cmu_time_awb_lex)); /* Waveform synthesis */ flite_feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth)); flite_feat_set(v->features,"clunit_db",clunit_db_val(&cmu_time_awb_db)); flite_feat_set_int(v->features,"sample_rate",cmu_time_awb_db.sts->sample_rate); flite_feat_set_string(v->features,"join_type","simple_join"); flite_feat_set_string(v->features,"resynth_type","fixed"); /* Unit selection */ cmu_time_awb_db.unit_name_func = cmu_time_awb_unit_name; cmu_time_awb_ldom = v; return cmu_time_awb_ldom; }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; extra_feats = new_features(); flite_init(); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ v = REGISTER_VOX(NULL); feat_copy_into(extra_feats,v->features); durs = 0.0; if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) durs = flite_text_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); UNREGISTER_VOX(v); return 0; }
cst_voice *cst_cg_load_voice(const char *filename, const cst_lang *lang_table) { cst_voice *vox; cst_lexicon *lex = NULL; int i, end_of_features; const char *language; const char *xname; cst_cg_db *cg_db; char* fname; char* fval; cst_file vd; vd = cst_fopen(filename,CST_OPEN_READ | CST_OPEN_BINARY); if (vd == NULL) { cst_errmsg("Error load voice: can't open file %s\n",filename); return NULL; } if (cst_cg_read_header(vd) != 0) { cst_errmsg("Error load voice: %s does not have expected header\n",filename); cst_fclose(vd); return NULL; } vox = new_voice(); /* Read voice features from the external file */ /* Read until the feature is "end_of_features" */ fname=""; end_of_features = 0; while (end_of_features == 0) { cst_read_voice_feature(vd,&fname, &fval); if (cst_streq(fname,"end_of_features")) end_of_features = 1; else { xname = feat_own_string(vox->features,fname); flite_feat_set_string(vox->features,xname, fval); } cst_free(fname); cst_free(fval); } /* Load up cg_db from external file */ cg_db = cst_cg_load_db(vox,vd); if (cg_db == NULL) { cst_fclose(vd); return NULL; } /* Use the language feature to initialize the correct voice */ language = flite_get_param_string(vox->features, "language", ""); /* Search Lang table for lang_init() and lex_init(); */ for (i=0; lang_table[i].lang; i++) { if (cst_streq(language,lang_table[i].lang)) { (lang_table[i].lang_init)(vox); lex = (lang_table[i].lex_init)(); break; } } if (lex == NULL) { /* Language is not supported */ /* Delete allocated memory in cg_db */ cst_cg_free_db(vd,cg_db); cst_fclose(vd); cst_errmsg("Error load voice: lang/lex %s not supported in this binary\n",language); return NULL; } /* Things that weren't filled in already. */ vox->name = cg_db->name; flite_feat_set_string(vox->features,"name",cg_db->name); flite_feat_set_string(vox->features,"pathname",filename); flite_feat_set(vox->features,"lexicon",lexicon_val(lex)); flite_feat_set(vox->features,"postlex_func",uttfunc_val(lex->postlex)); /* No standard segment durations are needed as its done at the */ /* HMM state level */ flite_feat_set_string(vox->features,"no_segment_duration_model","1"); flite_feat_set_string(vox->features,"no_f0_target_model","1"); /* Waveform synthesis */ flite_feat_set(vox->features,"wave_synth_func",uttfunc_val(&cg_synth)); flite_feat_set(vox->features,"cg_db",cg_db_val(cg_db)); flite_feat_set_int(vox->features,"sample_rate",cg_db->sample_rate); cst_fclose(vd); return vox; }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; cst_voice *desired_voice = 0; const char *voicedir = NULL; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones, ssml_mode; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; const char *lex_addenda_file = NULL; const char *voicedumpfile = NULL; cst_audio_streaming_info *asi; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; ssml_mode = FALSE; extra_feats = new_features(); flite_init(); flite_add_lang("eng",usenglish_init,cmu_lex_init); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-lv")) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); flite_voice_list_print(); exit(0); } else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-voice")) && (i+1 < argc)) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); desired_voice = flite_voice_select(argv[i+1]); i++; } else if ((cst_streq(argv[i],"-voicedir")) && (i+1 < argc)) { voicedir = argv[i+1]; if (flite_voice_list == NULL) flite_set_voice_list(voicedir); i++; } else if ((cst_streq(argv[i],"-add_lex")) && (i+1 < argc)) { lex_addenda_file = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ssml")) { ssml_mode = TRUE; } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if (cst_streq(argv[i],"-voicedump") && (i+1 < argc)) { voicedumpfile = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ if (flite_voice_list == NULL) flite_set_voice_list(voicedir); if (desired_voice == 0) desired_voice = flite_voice_select(NULL); v = desired_voice; feat_copy_into(extra_feats,v->features); durs = 0.0; if (voicedumpfile != NULL) { flite_voice_dump(v,voicedumpfile); exit(0); } if (lex_addenda_file) flite_voice_add_lex_addenda(v,lex_addenda_file); if (cst_streq("stream",outtype)) { asi = new_audio_streaming_info(); asi->asc = audio_stream_chunk; feat_set(v->features,"streaming_info",audio_streaming_info_val(asi)); } if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) { if (ssml_mode) durs = flite_ssml_text_to_speech(filename,v,outtype); else durs = flite_text_to_speech(filename,v,outtype); } else { if (ssml_mode) durs = flite_ssml_file_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); } gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); delete_val(flite_voice_list); flite_voice_list=0; /* cst_alloc_debug_summary(); */ return 0; }