cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v = new_voice(); usenglish_init(v); /* Set up basic values for synthesizing with this voice */ feat_set_string(v->features,"name","cmu_us_kal_diphone"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Intonation */ feat_set_float(v->features,"int_f0_target_mean",105.0); feat_set_float(v->features,"int_f0_target_stddev",14.0); /* Post lexical rules */ feat_set(v->features,"postlex_func",uttfunc_val(&cmu_us_kal_postlex)); /* Duration */ feat_set_float(v->features,"duration_stretch",1.1); /* Waveform synthesis: diphone_synth */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal_db.sts->sample_rate); feat_set_string(v->features,"resynth_type","fixed"); feat_set_string(v->features,"join_type","modified_lpc"); cmu_us_kal_diphone = v; return cmu_us_kal_diphone; }
cst_voice *register___VOICENAME__(const char *voxdir) { cst_voice *v = new_voice(); v->name = "__NICKNAME__"; /* Sets up language specific parameters in the __VOICENAME__. */ usenglish_init(v); /* Things that weren't filled in already. */ feat_set_string(v->features,"name","__VOICENAME__"); /* Lexicon */ cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(&cmu_lex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&clunits_synth)); feat_set(v->features,"clunit_db",clunit_db_val(&__VOICENAME___db)); feat_set_int(v->features,"sample_rate",__VOICENAME___db.sts->sample_rate); feat_set_string(v->features,"join_type","simple_join"); feat_set_string(v->features,"resynth_type","fixed"); /* Unit selection */ __VOICENAME___db.unit_name_func = __VOICENAME___unit_name; __VOICENAME___clunits = v; return __VOICENAME___clunits; }
void item_contents_set(cst_item *current, cst_item *i) { cst_item_contents *c = 0; cst_item *nn_item; if (i == 0) c = new_item_contents(current); else c = i->contents; if (c != current->contents) { item_unref_contents(current); current->contents = c; /* If this contents is already in this relation */ /* empty the other reference */ if (feat_present(current->contents->relations,current->relation->name)) { /* oops this is already in this relation */ nn_item = val_item(feat_val(current->contents->relations, current->relation->name)); feat_set(nn_item->contents->relations, current->relation->name, item_val(nn_item)); } /* Add back reference */ feat_set(current->contents->relations, current->relation->name, item_val(current)); } }
cst_voice* register_cmu_us_no_wave( const char* voxdir ) { cst_voice* v = new_voice(); cst_lexicon* lex; v->name = "no_wave_voice"; /* Set up basic values for synthesizing with this voice */ usenglish_init( v ); feat_set_string( v->features, "name", "cmu_us_no_wave" ); /* Lexicon */ lex = cmu_lex_init(); feat_set( v->features, "lexicon", lexicon_val( lex ) ); /* Intonation */ feat_set_float( v->features, "int_f0_target_mean", 95.0 ); feat_set_float( v->features, "int_f0_target_stddev", 11.0 ); feat_set_float( v->features, "duration_stretch", 1.1 ); /* Post lexical rules */ feat_set( v->features, "postlex_func", uttfunc_val( lex->postlex ) ); /* Waveform synthesis: diphone_synth */ feat_set( v->features, "wave_synth_func", uttfunc_val( &no_wave_synth ) ); cmu_us_no_wave = v; return cmu_us_no_wave; }
void russian_init(cst_utterance *u) { feat_set(u->features,"phoneset",phoneset_val(&ru_phoneset)); feat_set_string(u->features,"silence",ru_phoneset.silence); feat_set(u->features,"textanalysis_func",uttfunc_val(&russian_textanalysis)); feat_set(u->features,"lexical_insertion_func",uttfunc_val(&russian_lexical_insertion)); feat_set(u->features,"phrasing_func",uttfunc_val(&russian_phrasify)); feat_set(u->features,"pause_insertion_func",uttfunc_val(&russian_pause_insertion)); feat_set(u->features,"intonation_func",uttfunc_val(&do_nothing)); feat_set(u->features,"postlex_func",uttfunc_val(russian_postlex_function)); feat_set(u->features,"duration_model_func",uttfunc_val(do_nothing)); feat_set(u->features,"f0_model_func",uttfunc_val(do_nothing)); feat_set_string(u->features,"no_segment_duration_model","1"); feat_set_string(u->features,"no_f0_target_model","1"); feat_set(u->features,"wave_synth_func",uttfunc_val(&hts_synth)); ru_ff_register(u->ffunctions); }
void usenglish_init(cst_voice *v) { us_text_init(); /* utterance break function */ feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break)); /* Phoneset */ feat_set(v->features,"phoneset",phoneset_val(&us_phoneset)); feat_set_string(v->features,"silence",us_phoneset.silence); /* Text analyser */ feat_set_string(v->features,"text_whitespace",us_english_whitespace); feat_set_string(v->features,"text_postpunctuation",us_english_punctuation); feat_set_string(v->features,"text_prepunctuation", us_english_prepunctuation); feat_set_string(v->features,"text_singlecharsymbols", us_english_singlecharsymbols); feat_set(v->features,"tokentowords_func",itemfunc_val(&us_tokentowords)); /* very simple POS tagger */ feat_set(v->features,"pos_tagger_cart",cart_val(&us_pos_cart)); /* Phrasing */ feat_set(v->features,"phrasing_cart",cart_val(&us_phrasing_cart)); /* Intonation */ feat_set(v->features,"int_cart_accents",cart_val(&us_int_accent_cart)); feat_set(v->features,"int_cart_tones",cart_val(&us_int_tone_cart)); #ifndef FLITE_PLUS_HTS_ENGINE /* Duration */ feat_set(v->features,"dur_cart",cart_val(&us_durz_cart)); feat_set(v->features,"dur_stats",dur_stats_val((dur_stats *)us_dur_stats)); /* f0 model */ feat_set(v->features,"f0_model_func",uttfunc_val(&us_f0_model)); #endif /* !FLITE_PLUS_HTS_ENGINE */ us_ff_register(v->ffunctions); }
int feat_copy_into(const cst_features *from,cst_features *to) { /* Copy all features in from into to */ cst_featvalpair *p; int i; for (i=0,p=from->head; p; p=p->next,i++) feat_set(to,p->name,p->val); return i; }
cst_voice *register_cmu_us_kal16(const char *voxdir) { cst_voice *v; cst_lexicon *lex; if (cmu_us_kal16_diphone) return cmu_us_kal16_diphone; /* Already registered */ v = new_voice(); v->name = "kal16"; /* Sets up language specific parameters in the cmu_us_kal16. */ usenglish_init(v); feat_set_string(v->features,"name","cmu_us_kal16"); feat_set_float(v->features,"int_f0_target_mean",95.0); feat_set_float(v->features,"int_f0_target_stddev",11.0); feat_set_float(v->features,"duration_stretch",1.1); /* Lexicon */ lex = cmu_lex_init(); feat_set(v->features,"lexicon",lexicon_val(lex)); feat_set(v->features,"postlex_func",uttfunc_val(lex->postlex)); /* Waveform synthesis */ feat_set(v->features,"wave_synth_func",uttfunc_val(&diphone_synth)); feat_set(v->features,"diphone_db",diphone_db_val(&cmu_us_kal16_db)); feat_set_int(v->features,"sample_rate",cmu_us_kal16_db.sts->sample_rate); /* feat_set_string(v->features,"join_type","simple_join"); */ feat_set_string(v->features,"join_type","modified_lpc"); feat_set_string(v->features,"resynth_type","fixed"); cmu_us_kal16_diphone = v; return cmu_us_kal16_diphone; }
void flowm_init() { #ifdef DOTTS int i; cst_audio_streaming_info *asi; flite_init(); /* Initialize flite interface */ for (i=0; VoxDefs[i].name; i++) { VoxDefs[i].v = (VoxDefs[i].rv)(NULL); /* register voice */ /* Set up call back function for low level audio streaming */ /* This way it plays the waveform as it synthesizes it */ /* This is necessary for the slower (CG) voices */ asi = new_audio_streaming_info(); asi->asc = flowm_audio_stream_chunk; asi->min_buffsize = VoxDefs[i].min_buffsize; feat_set(VoxDefs[i].v->features, "streaming_info", audio_streaming_info_val(asi)); /* Set up call back function for sending what tokens are being */ /* synthesized and for keeping track of the current position in */ /* the file */ feat_set(VoxDefs[i].v->features, "utt_user_callback", uttfunc_val(flowm_utt_callback)); /* For outputing results of a relation (only used in play) */ feat_set(VoxDefs[i].v->features, "post_synth_hook_func", uttfunc_val(flowm_print_relation_callback)); } #endif return; }
void cmu_LANGNAME_lang_init(cst_voice *v) { /* Set LANGNAME language stuff */ feat_set_string(v->features,"language","cmu_LANGNAME_lang"); /* utterance break function */ feat_set(v->features,"utt_break",breakfunc_val(&default_utt_break)); /* Phoneset -- need to get this from voice */ feat_set(v->features,"phoneset",phoneset_val(&cmu_LANGNAME_phoneset)); feat_set_string(v->features,"silence",cmu_LANGNAME_phoneset.silence); /* Get information from voice and add to lexicon */ /* Text analyser -- whitespace defaults */ feat_set_string(v->features,"text_whitespace", cst_ts_default_whitespacesymbols); feat_set_string(v->features,"text_postpunctuation", cst_ts_default_prepunctuationsymbols); feat_set_string(v->features,"text_prepunctuation", cst_ts_default_postpunctuationsymbols); feat_set_string(v->features,"text_singlecharsymbols", cst_ts_default_singlecharsymbols); /* Tokenization tokenization function */ feat_set(v->features,"tokentowords_func",itemfunc_val(&cmu_LANGNAME_tokentowords)); /* Pos tagger (gpos)/induced pos */ /* Phrasing */ /* feat_set(v->features,"phrasing_cart",cart_val(&cmu_LANGNAME_phrasing_cart)); */ /* Intonation, Duration and F0 -- part of cg */ feat_set_string(v->features,"no_intonation_accent_model","1"); /* Default ffunctions (required) */ basic_ff_register(v->ffunctions); return; }
void feat_set_float(cst_features *f, const char *name, float v) { feat_set(f,name,float_val(v)); }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; cst_voice *desired_voice = 0; const char *voicedir = NULL; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones, ssml_mode; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; const char *lex_addenda_file = NULL; const char *voicedumpfile = NULL; cst_audio_streaming_info *asi; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; ssml_mode = FALSE; extra_feats = new_features(); flite_init(); flite_add_lang("eng",usenglish_init,cmu_lex_init); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-lv")) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); flite_voice_list_print(); exit(0); } else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-voice")) && (i+1 < argc)) { if (flite_voice_list == NULL) flite_set_voice_list(voicedir); desired_voice = flite_voice_select(argv[i+1]); i++; } else if ((cst_streq(argv[i],"-voicedir")) && (i+1 < argc)) { voicedir = argv[i+1]; if (flite_voice_list == NULL) flite_set_voice_list(voicedir); i++; } else if ((cst_streq(argv[i],"-add_lex")) && (i+1 < argc)) { lex_addenda_file = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ssml")) { ssml_mode = TRUE; } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if (cst_streq(argv[i],"-voicedump") && (i+1 < argc)) { voicedumpfile = argv[i+1]; i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ if (flite_voice_list == NULL) flite_set_voice_list(voicedir); if (desired_voice == 0) desired_voice = flite_voice_select(NULL); v = desired_voice; feat_copy_into(extra_feats,v->features); durs = 0.0; if (voicedumpfile != NULL) { flite_voice_dump(v,voicedumpfile); exit(0); } if (lex_addenda_file) flite_voice_add_lex_addenda(v,lex_addenda_file); if (cst_streq("stream",outtype)) { asi = new_audio_streaming_info(); asi->asc = audio_stream_chunk; feat_set(v->features,"streaming_info",audio_streaming_info_val(asi)); } if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) { if (ssml_mode) durs = flite_ssml_text_to_speech(filename,v,outtype); else durs = flite_text_to_speech(filename,v,outtype); } else { if (ssml_mode) durs = flite_ssml_file_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); } gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); delete_val(flite_voice_list); flite_voice_list=0; /* cst_alloc_debug_summary(); */ return 0; }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); timing_bind_name("km", timing_new()); timing_bind_name("var", timing_new()); timing_bind_name("em", timing_new()); timing_bind_name("all", timing_new()); if (cmd_ln_access("-feat") != NULL) { feat_set(cmd_ln_str("-feat")); feat_set_in_veclen(cmd_ln_int32("-ceplen")); feat_set_subvecs(cmd_ln_str("-svspec")); } else { E_FATAL("You need to set a feature extraction config using -feat\n"); } if (cmd_ln_access("-ldafn") != NULL) { if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) { E_FATAL("Failed to read LDA matrix\n"); } } if (cmd_ln_access("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_access("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_access("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_access("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_access("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_access("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_access("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_access("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_access("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_access("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = *(int32 *)cmd_ln_access("-stride"); return S3_SUCCESS; }
void flite_feat_set(cst_features *f, const char *name,const cst_val *v) { feat_set(f,name,v); }
void item_set(const cst_item *i,const char *name,const cst_val *val) { feat_set(item_feats(i),name,val); }
const cst_val *cart_interpret(cst_item *item, const cst_cart *tree) { /* Tree interpretation */ const cst_val *v=0; const cst_val *tree_val; const char *tree_feat = ""; cst_features *fcache; int r=0; int node=0; fcache = new_features_local(item_utt(item)->ctx); while (cst_cart_node_op(node,tree) != CST_CART_OP_LEAF) { #if CART_DEBUG cart_print_node(node,tree); #endif tree_feat = cst_cart_node_feat(node,tree); v = get_param_val(fcache,tree_feat,0); if (v == 0) { v = ffeature(item,tree_feat); feat_set(fcache,tree_feat,v); } #if CART_DEBUG val_print(stdout,v); printf("\n"); #endif tree_val = cst_cart_node_val(node,tree); if (cst_cart_node_op(node,tree) == CST_CART_OP_IS) r = val_equal(v,tree_val); else if (cst_cart_node_op(node,tree) == CST_CART_OP_LESS) r = val_less(v,tree_val); else if (cst_cart_node_op(node,tree) == CST_CART_OP_GREATER) r = val_greater(v,tree_val); else if (cst_cart_node_op(node,tree) == CST_CART_OP_IN) r = val_member(v,tree_val); else if (cst_cart_node_op(node,tree) == CST_CART_OP_MATCHES) r = cst_regex_match(cst_regex_table[val_int(tree_val)], val_string(v)); else { cst_errmsg("cart_interpret_question: unknown op type %d\n", cst_cart_node_op(node,tree)); cst_error(); } if (r) { /* Oh yes it is */ #if CART_DEBUG printf(" YES\n"); #endif node = cst_cart_node_yes(node,tree); } else { /* Oh no it isn't */ #if CART_DEBUG printf(" NO\n"); #endif node = cst_cart_node_no(node,tree); } } delete_features(fcache); return cst_cart_node_val(node,tree); }
int main(int argc, char **argv) { struct timeval tv; cst_voice *v; const char *filename; const char *outtype; int i; float durs; double time_start, time_end; int flite_verbose, flite_loop, flite_bench; int explicit_filename, explicit_text, explicit_phones; #define ITER_MAX 3 int bench_iter = 0; cst_features *extra_feats; filename = 0; outtype = "play"; /* default is to play */ flite_verbose = FALSE; flite_loop = FALSE; flite_bench = FALSE; explicit_text = explicit_filename = explicit_phones = FALSE; extra_feats = new_features(); flite_init(); for (i=1; i<argc; i++) { if (cst_streq(argv[i],"--version")) { flite_version(); return 1; } else if (cst_streq(argv[i],"-h") || cst_streq(argv[i],"--help") || cst_streq(argv[i],"-?")) flite_usage(); else if (cst_streq(argv[i],"-v")) flite_verbose = TRUE; else if (cst_streq(argv[i],"-l")) flite_loop = TRUE; else if (cst_streq(argv[i],"-b")) { flite_bench = TRUE; break; /* ignore other arguments */ } else if ((cst_streq(argv[i],"-o")) && (i+1 < argc)) { outtype = argv[i+1]; i++; } else if (cst_streq(argv[i],"-f") && (i+1 < argc)) { filename = argv[i+1]; explicit_filename = TRUE; i++; } else if (cst_streq(argv[i],"-pw")) { feat_set_string(extra_feats,"print_info_relation","Word"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-ps")) { feat_set_string(extra_feats,"print_info_relation","Segment"); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); } else if (cst_streq(argv[i],"-pr") && (i+1 < argc)) { feat_set_string(extra_feats,"print_info_relation",argv[i+1]); feat_set(extra_feats,"post_synth_hook_func", uttfunc_val(&print_info)); i++; } else if ((cst_streq(argv[i],"-set") || cst_streq(argv[i],"-s")) && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],0); i++; } else if (cst_streq(argv[i],"--seti") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"int"); i++; } else if (cst_streq(argv[i],"--setf") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"float"); i++; } else if (cst_streq(argv[i],"--sets") && (i+1 < argc)) { ef_set(extra_feats,argv[i+1],"string"); i++; } else if (cst_streq(argv[i],"-p") && (i+1 < argc)) { filename = argv[i+1]; explicit_phones = TRUE; i++; } else if (cst_streq(argv[i],"-t") && (i+1 < argc)) { filename = argv[i+1]; explicit_text = TRUE; i++; } else if (filename) outtype = argv[i]; else filename = argv[i]; } if (filename == NULL) filename = "-"; /* stdin */ v = REGISTER_VOX(NULL); feat_copy_into(extra_feats,v->features); durs = 0.0; if (flite_bench) { outtype = "none"; filename = "A whole joy was reaping, but they've gone south, you should fetch azure mike."; explicit_text = TRUE; } loop: gettimeofday(&tv,NULL); time_start = (double)(tv.tv_sec)+(((double)tv.tv_usec)/1000000.0); if (explicit_phones) durs = flite_phones_to_speech(filename,v,outtype); else if ((strchr(filename,' ') && !explicit_filename) || explicit_text) durs = flite_text_to_speech(filename,v,outtype); else durs = flite_file_to_speech(filename,v,outtype); gettimeofday(&tv,NULL); time_end = ((double)(tv.tv_sec))+((double)tv.tv_usec/1000000.0); if (flite_verbose || (flite_bench && bench_iter == ITER_MAX)) printf("times faster than real-time: %f\n(%f seconds of speech synthesized in %f)\n", durs/(float)(time_end-time_start), durs, (float)(time_end-time_start)); if (flite_loop || (flite_bench && bench_iter++ < ITER_MAX)) goto loop; delete_features(extra_feats); UNREGISTER_VOX(v); return 0; }
void feat_set_int(cst_features *f, const char *name, int v) { feat_set(f,name,int_val(v)); }
void feat_set_string(cst_features *f, const char *name, const char *v) { feat_set(f,name,string_val(v)); }
static int initialize(lexicon_t **out_lex, model_def_t **out_mdef, int argc, char *argv[]) { lexicon_t *lex = NULL; model_def_t *mdef = NULL; const char *fdictfn; const char *dictfn; const char *ts2cbfn; uint32 n_ts; uint32 n_cb; /* define, parse and (partially) validate the command line */ parse_cmd_ln(argc, argv); if (cmd_ln_access("-feat") != NULL) { feat_set(cmd_ln_str("-feat")); feat_set_in_veclen(cmd_ln_int32("-ceplen")); feat_set_subvecs(cmd_ln_str("-svspec")); } else { E_ERROR("Specify the feature extraction algorithm using -feat\n"); return S3_ERROR; } if (cmd_ln_access("-ldafn") != NULL) { if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) { E_FATAL("Failed to read LDA matrix\n"); } } if (cmd_ln_access("-segdir")) corpus_set_seg_dir(cmd_ln_access("-segdir")); if (cmd_ln_access("-segext")) corpus_set_seg_ext(cmd_ln_access("-segext")); corpus_set_mfcc_dir(cmd_ln_access("-cepdir")); corpus_set_mfcc_ext(cmd_ln_access("-cepext")); if (cmd_ln_access("-lsnfn")) corpus_set_lsn_filename(cmd_ln_access("-lsnfn")); corpus_set_ctl_filename(cmd_ln_access("-ctlfn")); if ( cmd_ln_access("-nskip") && cmd_ln_access("-runlen") ) { corpus_set_interval(*(int32 *)cmd_ln_access("-nskip"), *(int32 *)cmd_ln_access("-runlen")); } else if (cmd_ln_access("-part") && cmd_ln_access("-npart")) { corpus_set_partition(*(uint32 *)cmd_ln_access("-part"), *(uint32 *)cmd_ln_access("-npart")); } if (corpus_init() != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_access("-moddeffn")) { E_INFO("Reading %s\n", cmd_ln_access("-moddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&mdef, cmd_ln_access("-moddeffn")) != S3_SUCCESS) { return S3_ERROR; } ts2cbfn = (const char *)cmd_ln_access("-ts2cbfn"); if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { mdef->cb = semi_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { mdef->cb = cont_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = mdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { mdef->cb = ptm_ts2cb(mdef); n_ts = mdef->n_tied_state; n_cb = mdef->acmod_set->n_ci; } else if (s3ts2cb_read(ts2cbfn, &mdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } dictfn = cmd_ln_access("-dictfn"); if (dictfn == NULL) { E_FATAL("You must specify a content dictionary using -dictfn\n"); } E_INFO("Reading %s\n", dictfn); lex = lexicon_read(NULL, /* no lexicon to start */ dictfn, mdef->acmod_set); if (lex == NULL) return S3_ERROR; fdictfn = cmd_ln_access("-fdictfn"); if (fdictfn) { E_INFO("Reading %s\n", fdictfn); (void)lexicon_read(lex, /* add filler words content lexicon */ fdictfn, mdef->acmod_set); } } *out_mdef = mdef; *out_lex = lex; return S3_SUCCESS; }