cst_voice *register___VOICENAME__(const char *voxdir) { cst_voice *v = new_voice(); v->name = "__NICKNAME__"; /* Sets up language specific parameters in the __VOICENAME__. */ usenglish_init(v); /* Things that weren't filled in already. */ feat_set_string(v->features,"name","__VOICENAME__"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth)); feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db)); feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate); feat_set_string(v->features,"join_type","simple_join"); feat_set_string(v->features,"resynth_type","fixed"); /* Unit selection */ __VOICENAME___db.unit_name_func = __VOICENAME___unit_name; __VOICENAME___clunits = v; return __VOICENAME___clunits; }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; const char *xop; ts = ts_open_string(description, " \t\r\n", /* whitespace */ "{}[]|", /* singlecharsymbols */ "", /* prepunctuation */ ""); /* postpunctuation */ while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (cst_strchr(ts->whitespace,'\n') != 0)) { /* got an option */ xop = feat_own_string(f,op); arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,xop,arg); else feat_set_string(f,xop,"<binary>"); } cst_free(op); } ts_close(ts); }
cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v = new_voice(); usenglish_init(v); /* Set up basic values for synthesizing with this voice */ feat_set_string(v->features,"name","cmu_us_kal_diphone"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Intonation */ feat_set_float(v->features,"int_f0_target_mean",105.0); feat_set_float(v->features,"int_f0_target_stddev",14.0); /* Post lexical rules */ feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex)); /* Duration */ feat_set_float(v->features,"duration_stretch",1.1); /* Waveform synthesis: diphone_synth */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate); feat_set_string(v->features,"resynth_type","fixed"); feat_set_string(v->features,"join_type","modified_lpc"); cmu_us_kal_diphone = v; return cmu_us_kal_diphone; }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; ts = ts_open_string(description); ts->whitespacesymbols = " \t\r\n"; ts->singlecharsymbols = "{}[]|"; ts->prepunctuationsymbols = ""; ts->postpunctuationsymbols = ""; while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (strchr(ts->whitespace,'\n') != 0)) { /* got an option */ arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,op,arg); else feat_set_string(f,op,"<binary>"); } } ts_close(ts); }
int flowm_say_text(TCHAR *text) { char *s; int ns; cst_voice *v; if (previous_wave) { delete_wave(previous_wave); previous_wave = NULL; } s = cst_wstr2cstr(text); /* text to synthesize */ v = VoxDefs[flowm_selected_voice].v; /* voice to synthesize with */ feat_remove(v->features,"print_info_relation"); if (flowm_selected_relation == 1) feat_set_string(v->features, "print_info_relation", "Word"); if (flowm_selected_relation == 2) feat_set_string(v->features, "print_info_relation", "Segment"); /* Do the synthesis */ previous_wave = flite_text_to_wave(s,v); ns = cst_wave_num_samples(previous_wave); cst_free(s); audio_flush(fl_ad); audio_close(fl_ad); fl_ad = NULL; return ns; }
cst_val *cst_args(char **argv, int argc, const char *description, cst_features *args) { /* parses the given arguments wrt the description */ cst_features *op_types = new_features(); cst_val *files = NULL; int i; const char *type; parse_description(description,op_types); for (i=1; i<argc; i++) { if (argv[i][0] == '-') { if ((!feat_present(op_types,argv[i])) || (cst_streq("-h",argv[i])) || (cst_streq("-?",argv[i])) || (cst_streq("--help",argv[i])) || (cst_streq("-help",argv[i]))) parse_usage(argv[0],"","",description); else { type = feat_string(op_types,argv[i]); if (cst_streq("<binary>",type)) feat_set_string(args,argv[i],"true"); else { if (i+1 == argc) parse_usage(argv[0], "missing argument for ",argv[i], description); if (cst_streq("<int>",type)) feat_set_int(args,argv[i],atoi(argv[i+1])); else if (cst_streq("<float>",type)) feat_set_float(args,argv[i],atof(argv[i+1])); else if (cst_streq("<string>",type)) feat_set_string(args,argv[i],argv[i+1]); else parse_usage(argv[0], "unknown arg type ",type, description); i++; } } } else files = cons_val(string_val(argv[i]),files); } delete_features(op_types); return val_reverse(files); }
static void ef_set(cst_features *f,const char *fv,const char *type) { /* set feature from fv (F=V), guesses type if not explicit type given */ const char *val; char *feat; if ((val = strchr(fv,'=')) == 0) { fprintf(stderr, "flite: can't find '=' in featval \"%s\", ignoring it\n", fv); } else { feat = cst_strdup(fv); feat[strlen(fv)-strlen(val)] = '\0'; val = val+1; if ((type && cst_streq("int",type)) || ((type == 0) && (cst_regex_match(cst_rx_int,val)))) feat_set_int(f,feat,atoi(val)); else if ((type && cst_streq("float",type)) || ((type == 0) && (cst_regex_match(cst_rx_double,val)))) feat_set_float(f,feat,atof(val)); else feat_set_string(f,feat,val); /* I don't free feat, because feats think featnames are const */ /* which is true except in this particular case */ } }
static void ef_set(cst_features *f,const char *fv,const char *type) { /* set feature from fv (F=V), guesses type if not explicit type given */ const char *val; char *feat; if ((val = strchr(fv,'=')) == 0) { fprintf(stderr, "flite: can't find '=' in featval \"%s\", ignoring it\n", fv); } else { feat = cst_strdup(fv); feat[cst_strlen(fv)-cst_strlen(val)] = '\0'; val = val+1; if ((type && cst_streq("int",type)) || ((type == 0) && (cst_regex_match(cst_rx_int,val)))) feat_set_int(f,feat,atoi(val)); else if ((type && cst_streq("float",type)) || ((type == 0) && (cst_regex_match(cst_rx_double,val)))) feat_set_float(f,feat,atof(val)); else feat_set_string(f,feat,val); cst_free(feat); } }
cst_voice* register_cmu_us_no_wave( const char* voxdir ) { cst_voice* v = new_voice(); cst_lexicon* lex; v->name = "no_wave_voice"; /* Set up basic values for synthesizing with this voice */ usenglish_init( v ); feat_set_string( v->features, "name", "cmu_us_no_wave" ); /* Lexicon */ lex = cmu_lex_init(); feat_set( v->features, "lexicon", lexicon_val( lex ) ); /* Intonation */ feat_set_float( v->features, "int_f0_target_mean", 95.0 ); feat_set_float( v->features, "int_f0_target_stddev", 11.0 ); feat_set_float( v->features, "duration_stretch", 1.1 ); /* Post lexical rules */ feat_set( v->features, "postlex_func", uttfunc_val( lex->postlex ) ); /* Waveform synthesis: diphone_synth */ feat_set( v->features, "wave_synth_func", uttfunc_val( &no_wave_synth ) ); cmu_us_no_wave = v; return cmu_us_no_wave; }
void russian_init(cst_utterance *u) { feat_set(u->features,"phoneset",phoneset_val(&ru_phoneset)); feat_set_string(u->features,"silence",ru_phoneset.silence); feat_set(u->features,"textanalysis_func",uttfunc_val(&russian_textanalysis)); feat_set(u->features,"lexical_insertion_func",uttfunc_val(&russian_lexical_insertion)); feat_set(u->features,"phrasing_func",uttfunc_val(&russian_phrasify)); feat_set(u->features,"pause_insertion_func",uttfunc_val(&russian_pause_insertion)); feat_set(u->features,"intonation_func",uttfunc_val(&do_nothing)); feat_set(u->features,"postlex_func",uttfunc_val(russian_postlex_function)); feat_set(u->features,"duration_model_func",uttfunc_val(do_nothing)); feat_set(u->features,"f0_model_func",uttfunc_val(do_nothing)); feat_set_string(u->features,"no_segment_duration_model","1"); feat_set_string(u->features,"no_f0_target_model","1"); feat_set(u->features,"wave_synth_func",uttfunc_val(&hts_synth)); ru_ff_register(u->ffunctions); }
static cst_features *ssml_get_attributes(cst_tokenstream *ts) { cst_features *a = new_features(); const char* name, *val; set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_inattr, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); name = ts_get(ts); while (!cst_streq(">",name)) { if (cst_streq(name,"/")) feat_set_string(a,"_type","startend"); else { feat_set_string(a,"_type","start"); feat_set_string(a,"_name0",name); if (cst_streq("=",ts_get(ts))) { val = ts_get_quoted_remainder(ts); feat_set_string(a,"_val0",val); } } if (ts_eof(ts)) { fprintf(stderr,"ssml: unexpected EOF\n"); delete_features(a); return 0; } name = ts_get(ts); } set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_general, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); return a; }
void usenglish_init(cst_voice *v) { us_text_init(); /* utterance break function */ feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break)); /* Phoneset */ feat_set(v->features,"phoneset",phoneset_val(&us_phoneset)); feat_set_string(v->features,"silence",us_phoneset.silence); /* Text analyser */ feat_set_string(v->features,"text_whitespace",us_english_whitespace); feat_set_string(v->features,"text_postpunctuation",us_english_punctuation); feat_set_string(v->features,"text_prepunctuation", us_english_prepunctuation); feat_set_string(v->features,"text_singlecharsymbols", us_english_singlecharsymbols); feat_set(v->features,"tokentowords_func",itemfunc_val(&us_tokentowords)); /* very simple POS tagger */ feat_set(v->features,"pos_tagger_cart",cart_val(&us_pos_cart)); /* Phrasing */ feat_set(v->features,"phrasing_cart",cart_val(&us_phrasing_cart)); /* Intonation */ feat_set(v->features,"int_cart_accents",cart_val(&us_int_accent_cart)); feat_set(v->features,"int_cart_tones",cart_val(&us_int_tone_cart)); #ifndef FLITE_PLUS_HTS_ENGINE /* Duration */ feat_set(v->features,"dur_cart",cart_val(&us_durz_cart)); feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)us_dur_stats)); /* f0 model */ feat_set(v->features,"f0_model_func",uttfunc_val(&us_f0_model)); #endif /* !FLITE_PLUS_HTS_ENGINE */ us_ff_register(v->ffunctions); }
cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v; cst_lexicon *lex; if (cmu_us_kal16_diphone) return cmu_us_kal16_diphone; /* Already registered */ v = new_voice(); v->name = "kal16"; /* Sets up language specific parameters in the cmu_us_kal16. */ usenglish_init(v); feat_set_string(v->features,"name","cmu_us_kal16"); feat_set_float(v->features,"int_f0_target_mean",95.0); feat_set_float(v->features,"int_f0_target_stddev",11.0); feat_set_float(v->features,"duration_stretch",1.1); /* Lexicon */ lex = cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(lex)); feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal16_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal16_db.sts->sample_rate); /* feat_set_string(v->features,"join_type","simple_join"); */ feat_set_string(v->features,"join_type","modified_lpc"); feat_set_string(v->features,"resynth_type","fixed"); cmu_us_kal16_diphone = v; return cmu_us_kal16_diphone; }
const cst_val *val_string_x(const char *n) { const cst_val *v; /* *BUG* This will have to be fixed soon */ if (val_string_consts == NULL) val_string_consts = new_features(); v = feat_val(val_string_consts,n); if (v) return v; else { feat_set_string(val_string_consts,n,n); return feat_val(val_string_consts,n); } }
void cmu_LANGNAME_lang_init(cst_voice *v) { /* Set LANGNAME language stuff */ feat_set_string(v->features,"language","cmu_LANGNAME_lang"); /* utterance break function */ feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break)); /* Phoneset -- need to get this from voice */ feat_set(v->features,"phoneset",phoneset_val(&cmu_LANGNAME_phoneset)); feat_set_string(v->features,"silence",cmu_LANGNAME_phoneset.silence); /* Get information from voice and add to lexicon */ /* Text analyser -- whitespace defaults */ feat_set_string(v->features,"text_whitespace", cst_ts_default_whitespacesymbols); feat_set_string(v->features,"text_postpunctuation", cst_ts_default_prepunctuationsymbols); feat_set_string(v->features,"text_prepunctuation", cst_ts_default_postpunctuationsymbols); feat_set_string(v->features,"text_singlecharsymbols", cst_ts_default_singlecharsymbols); /* Tokenization tokenization function */ feat_set(v->features,"tokentowords_func",itemfunc_val(&cmu_LANGNAME_tokentowords)); /* Pos tagger (gpos)/induced pos */ /* Phrasing */ /* feat_set(v->features,"phrasing_cart",cart_val(&cmu_LANGNAME_phrasing_cart)); */ /* Intonation, Duration and F0 -- part of cg */ feat_set_string(v->features,"no_intonation_accent_model","1"); /* Default ffunctions (required) */ basic_ff_register(v->ffunctions); return; }
void item_set_string(const cst_item *i,const char *name,const char *val) { feat_set_string(item_feats(i),name,val); }
int main(int argc, char **argv) { bard_reader *br; cst_features *args; cst_features *config; float idle_time; int spos; args = new_features(); cst_args(argv,argc, "usage: bard OPTIONS\n" "Bard Storyteller\n" "-text <string> Text filename to read\n" "-text_pos <int> Starting position in text file\n" "-config_file <string> Override default config file ($HOME/.bard_config)\n" "-font <string> Pathname to ttf font file\n" "-font_size <int> Initial font size\n" "-audio_method <string> SDL or flite\n" "-voice <string> Voice name (or pathname to dumped voice)\n" "-audio_stream_buffer_factor <int> Bigger for slower processors\n" "-gain <float> Volume factor (1.0 is default)\n" "-speed <float> Duration stretch (inverse speed)\n" "-battery_script <string> Script to get battery charge state\n" "-screen_height <int> \n" "-screen_width <int> \n" "-screen_blank_idle_time <float> In secs (0 means no blanking)\n" "-scroll_delay <int> In ms\n" "-voices_dir <string> Directory containing *.flitevox voices\n" "-debug <int> Print debug messages\n" "", args); /* If text specified and no position given, start from 0 */ if (feat_present(args,"-text") && !feat_present(args,"-text_pos")) feat_set_int(args,"-text_pos",0); if (!feat_present(args,"-font")) feat_set_string(args,"-font",BARD_DEFAULT_FONT); bard_debug = get_param_int(args,"-debug",0); config = bard_read_config(args); cst_feat_print(stdout,config); br = bard_open(config); if (br == NULL) return -1; /* something went really wrong */ /* Make the windows */ br->text = bard_make_text_window(br); br->file_select = bard_make_file_select_window(br); br->info = bard_make_info_window(br); /* general info/params */ br->help = bard_make_help_window(br); br->recent = bard_make_recent_window(br); br->menu = bard_make_menu_window(br); if (feat_present(br->config,"-voices_dir")) br->voice_select = bard_make_voice_select_window(br); /* Select focus */ br->display->current = br->text; bard_display_clear(br->display); bard_display_update(br->display); bard_screen_on(NULL); /* just in case it is currently off */ while (br->quit == 0) { if (input_process_events(br) == 0) { if (br->scroll) { SDL_Delay(br->scroll_delay); /* no key press but scrolling */ br->quiet_time = SDL_GetTicks(); /* no blank while scrolling */ } else SDL_Delay(br->no_key_delay); /* no key press, so pause a bit */ } if (br->speak == 1) { /* Within the audio callback function, we also call */ /* input_process_events so we can stop if requested */ br->pause = 0; if (br->display->current->current_token) spos = br->display->current->current_token->file_pos; else spos = br->display->current->sop_pos; /* We need spos-1 here to make it display well */ bard_speak_text(br->speech,br->display->current->textfile,spos-1); br->speak = 0; br->quiet_time = SDL_GetTicks(); } else if (br->scroll) { /* Speaking and smooth scrolling are mutually exclusive */ if (bard_text_scroll(br->text) == 0) br->scroll = 0; /* Reached end of file so stop scroll */ bard_display_update(br->display); /* Update screen */ } else br->text->scroll_offset = 0; /* Check if we should black/hide cursor */ idle_time = (SDL_GetTicks()-br->quiet_time)/1000.0; if ((br->screen_blank_idle_time > 0) && (idle_time > br->screen_blank_idle_time) && (br->blank == 0)) { br->blank = 1; bard_screen_off(NULL); } idle_time = (SDL_GetTicks()-br->mouse_quiet_time)/1000.0; if (idle_time > 5.0) SDL_ShowCursor(SDL_DISABLE); } /* Save context */ bard_write_config(br); bard_close(br); delete_features(args); return 0; }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; cst_voice *desired_voice = 0; const char *voicedir = NULL; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones, ssml_mode; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; const char *lex_addenda_file = NULL; const char *voicedumpfile = NULL; cst_audio_streaming_info *asi; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; ssml_mode = FALSE; extra_feats = new_features(); flite_init(); flite_add_lang("eng",usenglish_init,cmu_lex_init); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-lv")) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); flite_voice_list_print(); exit(0); } else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-voice")) && (i+1 < argc)) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); desired_voice = flite_voice_select(argv[i+1]); i++; } else if ((cst_streq(argv[i],"-voicedir")) && (i+1 < argc)) { voicedir = argv[i+1]; if (flite_voice_list == NULL) flite_set_voice_list(voicedir); i++; } else if ((cst_streq(argv[i],"-add_lex")) && (i+1 < argc)) { lex_addenda_file = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ssml")) { ssml_mode = TRUE; } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if (cst_streq(argv[i],"-voicedump") && (i+1 < argc)) { voicedumpfile = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ if (flite_voice_list == NULL) flite_set_voice_list(voicedir); if (desired_voice == 0) desired_voice = flite_voice_select(NULL); v = desired_voice; feat_copy_into(extra_feats,v->features); durs = 0.0; if (voicedumpfile != NULL) { flite_voice_dump(v,voicedumpfile); exit(0); } if (lex_addenda_file) flite_voice_add_lex_addenda(v,lex_addenda_file); if (cst_streq("stream",outtype)) { asi = new_audio_streaming_info(); asi->asc = audio_stream_chunk; feat_set(v->features,"streaming_info",audio_streaming_info_val(asi)); } if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) { if (ssml_mode) durs = flite_ssml_text_to_speech(filename,v,outtype); else durs = flite_text_to_speech(filename,v,outtype); } else { if (ssml_mode) durs = flite_ssml_file_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); } gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); delete_val(flite_voice_list); flite_voice_list=0; /* cst_alloc_debug_summary(); */ return 0; }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; extra_feats = new_features(); flite_init(); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ v = REGISTER_VOX(NULL); feat_copy_into(extra_feats,v->features); durs = 0.0; if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) durs = flite_text_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); UNREGISTER_VOX(v); return 0; }
void flite_feat_set_string(cst_features *f, const char *name, const char *v) { feat_set_string(f,name,v); }
static float flite_ssml_to_speech_ts(cst_tokenstream *ts, cst_voice *voice, const char *outtype) { cst_features *ssml_feats, *ssml_word_feats; cst_features *attributes; const char *token; char *tag; cst_utterance *utt; cst_relation *tokrel; int num_tokens; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; float durs = 0.0; cst_item *t; ssml_feats = new_features(); ssml_word_feats = new_features(); set_charclasses(ts, " \t\n\r", ssml_singlecharsymbols_general, get_param_string(voice->features,"text_prepunctuation",""), get_param_string(voice->features,"text_postpunctuation","") ); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if (cst_streq("<",token)) { /* A tag */ tag = cst_upcase(ts_get(ts)); if (cst_streq("/",tag)) /* an end tag */ { tag = cst_upcase(ts_get(ts)); attributes = ssml_get_attributes(ts); feat_set_string(attributes,"_type","end"); } else attributes = ssml_get_attributes(ts); utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats); cst_free(tag); } else if (cst_streq("&",token)) { /* an escape sequence */ /* skip to ; and insert value in rawdata */ } else { if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } } delete_utterance(utt); return durs; }