/* HTS_Engine_load: load HTS voices */ HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices) { size_t i, j; size_t nstream; double average_weight; const char *option, *find; /* reset engine */ HTS_Engine_clear(engine); /* load voices */ if (HTS_ModelSet_load(&engine->ms, voices, num_voices) != TRUE) { HTS_Engine_clear(engine); return FALSE; } nstream = HTS_ModelSet_get_nstream(&engine->ms); average_weight = 1.0 / num_voices; /* global */ engine->condition.sampling_frequency = HTS_ModelSet_get_sampling_frequency(&engine->ms); engine->condition.fperiod = HTS_ModelSet_get_fperiod(&engine->ms); engine->condition.msd_threshold = (double *) HTS_calloc(nstream, sizeof(double)); for (i = 0; i < nstream; i++) engine->condition.msd_threshold[i] = 0.5; engine->condition.gv_weight = (double *) HTS_calloc(nstream, sizeof(double)); for (i = 0; i < nstream; i++) engine->condition.gv_weight[i] = 1.0; /* spectrum */ option = HTS_ModelSet_get_option(&engine->ms, 0); find = strstr(option, "GAMMA="); if (find != NULL) engine->condition.stage = (size_t) atoi(&find[strlen("GAMMA=")]); find = strstr(option, "LN_GAIN="); if (find != NULL) engine->condition.use_log_gain = atoi(&find[strlen("LN_GAIN=")]) == 1 ? TRUE : FALSE; find = strstr(option, "ALPHA="); if (find != NULL) engine->condition.alpha = atof(&find[strlen("ALPHA=")]); /* interpolation weights */ engine->condition.duration_iw = (double *) HTS_calloc(num_voices, sizeof(double)); for (i = 0; i < num_voices; i++) engine->condition.duration_iw[i] = average_weight; engine->condition.parameter_iw = (double **) HTS_calloc(num_voices, sizeof(double *)); for (i = 0; i < num_voices; i++) { engine->condition.parameter_iw[i] = (double *) HTS_calloc(nstream, sizeof(double)); for (j = 0; j < nstream; j++) engine->condition.parameter_iw[i][j] = average_weight; } engine->condition.gv_iw = (double **) HTS_calloc(num_voices, sizeof(double *)); for (i = 0; i < num_voices; i++) { engine->condition.gv_iw[i] = (double *) HTS_calloc(nstream, sizeof(double)); for (j = 0; j < nstream; j++) engine->condition.gv_iw[i][j] = average_weight; } return TRUE; }
/* HTS_Engine_load: load HTS voices */ HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices) { size_t i; size_t nstream; const char *option, *find; float tempfloat; /* reset engine */ HTS_Engine_clear(engine); /* load voices */ if (HTS_ModelSet_load(&engine->ms, voices) != TRUE) { HTS_Engine_clear(engine); return FALSE; } nstream = HTS_ModelSet_get_nstream(&engine->ms); /* global */ engine->condition.sampling_frequency = HTS_ModelSet_get_sampling_frequency(&engine->ms); engine->condition.fperiod = HTS_ModelSet_get_fperiod(&engine->ms); engine->condition.msd_threshold = cst_alloc(double,nstream); for (i = 0; i < nstream; i++) engine->condition.msd_threshold[i] = 0.5; /* spectrum */ option = HTS_ModelSet_get_option(&engine->ms, 0); find = strstr(option, "GAMMA="); if (find != NULL) { cst_errmsg("Non-Zero GAMMA not supported\n"); HTS_Engine_clear(engine); return FALSE; } find = strstr(option, "LN_GAIN=1"); if (find != NULL) { cst_errmsg("Non-Zero LN_GAIN not supported\n"); HTS_Engine_clear(engine); return FALSE; } find = strstr(option, "ALPHA="); if (find != NULL) { if (bell_validate_atof(&find[strlen("ALPHA=")],&tempfloat)) { engine->condition.alpha = tempfloat; } else { cst_errmsg("Voice file option 'ALPHA' is not float, setting to default 0.42\n"); engine->condition.alpha = 0.42; } } return TRUE; }
TextToSpeech::~TextToSpeech() { Mecab_clear(&open_jtalk_.mecab); NJD_clear(&open_jtalk_.njd); JPCommon_clear(&open_jtalk_.jpcommon); HTS_Engine_clear(&open_jtalk_.engine); }
static void Open_JTalk_clear(Open_JTalk * open_jtalk) { Mecab_clear(&open_jtalk->mecab); NJD_clear(&open_jtalk->njd); JPCommon_clear(&open_jtalk->jpcommon); HTS_Engine_clear(&open_jtalk->engine); }
/* we need to delete the window plug-in if any */ static void Destroy(void *obj, s_erc *error) { SHTSEngineSynthUttProc104 *self = obj; S_CLR_ERR(error); HTS_Engine_clear(&(self->engine)); }
/* we need to delete the window plug-in if any */ static void Destroy(void *obj, s_erc *error) { SHTSEngineMESynthUttProc105 *self = obj; S_CLR_ERR(error); HTS_Engine_clear(&(self->engine)); filter_destructor(self); }
static void cleanup(struct app *app) { Mecab_clear(&app->mecab); NJD_clear(&app->njd); JPCommon_clear(&app->jpcommon); HTS_Engine_clear(&app->engine); play_drain(app->play_h); play_exit(app->play_h); free(app->pcm); }
TextToSpeech::~TextToSpeech() { Mecab_clear(&open_jtalk_.mecab); NJD_clear(&open_jtalk_.njd); JPCommon_clear(&open_jtalk_.jpcommon); HTS_Engine_clear(&open_jtalk_.engine); play_drain(play_h_); play_exit(play_h_); delete[] (pcm_); }
int OpenJTalk_Delete(OpenJTalk** openjtalk) { NJD_clear(&(*openjtalk)->njd); JPCommon_clear(&(*openjtalk)->jpcommon); HTS_Engine_clear(&(*openjtalk)->engine); free( (*openjtalk)->fn_ws_mgc); free( (*openjtalk)->fn_ws_lf0); free( (*openjtalk)->fn_ws_lpf); return 1; }
void std_hts_engine_impl::do_initialize() { engine.reset(new HTS_Engine); HTS_Engine_initialize(engine.get()); engine->audio.audio_interface=this; std::string voice_path(path::join(model_path,"voice.data")); char* c_voice_path=const_cast<char*>(voice_path.c_str()); if(!HTS_Engine_load(engine.get(),&c_voice_path,1)) { HTS_Engine_clear(engine.get()); throw initialization_error(); } std::string bpf_path(path::join(model_path,"bpf.txt")); if(bpf_load(&engine->bpf,bpf_path.c_str())==0) { HTS_Engine_clear(engine.get()); throw initialization_error(); } HTS_Engine_set_beta(engine.get(),beta); HTS_Engine_set_audio_buff_size(engine.get(),HTS_Engine_get_fperiod(engine.get())); }
int main(int argc, char* argv[]) { size_t num_voices; char **fn_voices; char* in_fname; char* output_fname; FILE * outfp; char* dur_fname; FILE * durfp; bool print_label = false; bool print_utt = false; bool write_raw = false; bool write_durlabel = false; CFSAString LexFileName, LexDFileName; HTS_Engine engine; double speed = 1.1; size_t fr = 48000; size_t fp = 240; float alpha = 0.55; float beta = 0.0; float ht = 2.0; float th = 0.5; float gvw1 = 1.0; float gvw2 = 1.2; FSCInit(); fn_voices = (char **) malloc(argc * sizeof (char *)); if (argc < 11) { fprintf(stderr, "Viga: liiga vähe parameetreid\n\n"); PrintUsage(); } for (int i = 0; i < argc; i++) { if (CFSAString("-lex") == argv[i]) { if (i + 1 < argc) { LexFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-lexd") == argv[i]) { if (i + 1 < argc) { LexDFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-m") == argv[i]) { if (i + 1 < argc) { fn_voices[0] = argv[i + 1]; } else { fprintf(stderr, "Viga: puudub *.htsvoice fail\n"); PrintUsage(); exit(0); } } if (CFSAString("-o") == argv[i]) { if (i + 1 < argc) { output_fname = argv[i + 1]; cfileexists(output_fname); } else { fprintf(stderr, "Viga: puudb väljundfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-f") == argv[i]) { if (i + 1 < argc) { in_fname = argv[i + 1]; } else { fprintf(stderr, "Viga: puudb sisendfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-s") == argv[i]) { if (i + 1 < argc) { samplerate(fr, fp, alpha, atoi(argv[i + 1])); } } if (CFSAString("-r") == argv[i]) { if (i + 1 < argc) { speed = atof(argv[i + 1]); } } if (CFSAString("-ht") == argv[i]) { if (i + 1 < argc) { ht = atof(argv[i + 1]); } } if (CFSAString("-gvw1") == argv[i]) { if (i + 1 < argc) { gvw1 = atof(argv[i + 1]); } } if (CFSAString("-gvw2") == argv[i]) { if (i + 1 < argc) { gvw2 = atof(argv[i + 1]); } } if (CFSAString("-debug") == argv[i]) { print_label = true; } if (CFSAString("-utt") == argv[i]) { print_utt = true; } if (CFSAString("-raw") == argv[i]) { write_raw = true; } if (CFSAString("-dur") == argv[i]) { if (i + 1 < argc) { dur_fname = argv[i + 1]; cfileexists(dur_fname); write_durlabel = true; } else { fprintf(stderr, "Viga: puudb kestustefaili nimi\n"); PrintUsage(); exit(0); } } } Linguistic.Open(LexFileName); Disambiguator.Open(LexDFileName); CFSWString text; ReadUTF8Text(text, in_fname); HTS_Engine_initialize(&engine); if (HTS_Engine_load(&engine, fn_voices, 1) != TRUE) { fprintf(stderr, "Viga: puudub *.htsvoice. %p\n", fn_voices[0]); free(fn_voices); HTS_Engine_clear(&engine); exit(1); } free(fn_voices); HTS_Engine_set_sampling_frequency(&engine, (size_t) fr); HTS_Engine_set_phoneme_alignment_flag(&engine, FALSE); HTS_Engine_set_fperiod(&engine, (size_t) fp); HTS_Engine_set_alpha(&engine, alpha); HTS_Engine_set_beta(&engine, beta); HTS_Engine_set_speed(&engine, speed); HTS_Engine_add_half_tone(&engine, ht); HTS_Engine_set_msd_threshold(&engine, 1, th); /* HTS_Engine_set_duration_interpolation_weight(&engine, 1, diw); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 0, piw1); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 1, piw2); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 0, giw1); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 1, giw2); */ HTS_Engine_set_gv_weight(&engine, 0, gvw1); HTS_Engine_set_gv_weight(&engine, 1, gvw2); text = DealWithText(text); CFSArray<CFSWString> res = do_utterances(text); INTPTR data_size = 0; outfp = fopen(output_fname, "wb"); if (write_durlabel) durfp = fopen(dur_fname, "w"); if (!write_raw) HTS_Engine_write_header(&engine, outfp, 1); for (INTPTR i = 0; i < res.GetSize(); i++) { CFSArray<CFSWString> label = do_all(res[i], print_label, print_utt); std::vector<std::string> v; v = to_vector(label); std::vector<char*> vc; fill_char_vector(v, vc); size_t n_lines = vc.size(); if (HTS_Engine_synthesize_from_strings(&engine, &vc[0], n_lines) != TRUE) { fprintf(stderr, "Viga: süntees ebaonnestus.\n"); HTS_Engine_clear(&engine); exit(1); } clean_char_vector(vc); data_size += HTS_Engine_engine_speech_size(&engine); if (write_durlabel) HTS_Engine_save_durlabel(&engine, durfp); HTS_Engine_save_generated_speech(&engine, outfp); HTS_Engine_refresh(&engine); } //synth loop if (!write_raw) HTS_Engine_write_header(&engine, outfp, data_size); if (write_durlabel) fclose(durfp); fclose(outfp); HTS_Engine_clear(&engine); Linguistic.Close(); FSCTerminate(); return 0; }
std_hts_engine_impl::~std_hts_engine_impl() { if(engine.get()!=0) HTS_Engine_clear(engine.get()); }
/* Flite_HTS_Engine_clear: free system */ void Flite_HTS_Engine_clear(Flite_HTS_Engine * f) { HTS_Engine_clear(&f->engine); }
static void Initialize(SUttProcessor *self, const SVoice *voice, s_erc *error) { hts_params *engine_params; SHTSEngineMESynthUttProc105 *HTSsynth = (SHTSEngineMESynthUttProc105*)self; const SMap *hts_data; const SObject *vcfgObject; char *voice_base_path; S_CLR_ERR(error); /* get voice base path */ vcfgObject = SVoiceGetFeature(voice, "config_file", error); if (S_CHK_ERR(error, S_CONTERR, "Initialize", "Call to \"SVoiceGetFeature\" failed, failed to get voice config file")) return; voice_base_path = s_get_base_path(SObjectGetString(vcfgObject, error), error); if (S_CHK_ERR(error, S_CONTERR, "Initialize", "Call to \"s_get_base_path/SObjectGetString\" failed")) return; /* get the HTS engine settings */ engine_params = get_hts_engine_params(self->features, &(HTSsynth->me), error); if (S_CHK_ERR(error, S_CONTERR, "Initialize", "Call to \"get_hts_engine_params\" failed")) { S_FREE(voice_base_path); return; } /* initialize the engine */ if (HTSsynth->me == TRUE) { /* extra stream for strengths */ HTS_Engine_initialize(&(HTSsynth->engine), 3); } else { HTS_Engine_initialize(&(HTSsynth->engine), 2); } /* set the engine parameters */ HTS_Engine_set_sampling_rate(&(HTSsynth->engine), engine_params->sampling_rate); HTS_Engine_set_fperiod(&(HTSsynth->engine), engine_params->fperiod); HTS_Engine_set_alpha(&(HTSsynth->engine), engine_params->alpha); HTS_Engine_set_gamma(&(HTSsynth->engine), engine_params->stage); HTS_Engine_set_log_gain(&(HTSsynth->engine), engine_params->use_log_gain); HTS_Engine_set_beta(&(HTSsynth->engine), engine_params->beta); HTS_Engine_set_audio_buff_size(&(HTSsynth->engine), engine_params->audio_buff_size); HTS_Engine_set_msd_threshold(&(HTSsynth->engine), 1, engine_params->uv_threshold); HTS_Engine_set_gv_weight(&(HTSsynth->engine), 0, engine_params->gv_weight_mcp); HTS_Engine_set_gv_weight(&(HTSsynth->engine), 1, engine_params->gv_weight_lf0); if (HTSsynth->me == TRUE) HTS_Engine_set_gv_weight(&(HTSsynth->engine), 2, engine_params->gv_weight_str); S_FREE(engine_params); hts_data = S_MAP(SVoiceGetFeature(voice, "hts engine data", error)); if (S_CHK_ERR(error, S_CONTERR, "Initialize", "Call to \"SVoiceGetFeature\" failed")) goto quit_error; if (hts_data == NULL) { S_CTX_ERR(error, S_FAILURE, "Initialize", "Failed to get \"hts engine data\" map from voice features"); goto quit_error; } load_hts_engine_data(hts_data, HTSsynth, voice_base_path, error); if (S_CHK_ERR(error, S_CONTERR, "Initialize", "Call to \"load_hts_engine_data\" failed")) goto quit_error; HTS_Engine_set_duration_interpolation_weight(&(HTSsynth->engine), 0, 1.0); HTS_Engine_set_parameter_interpolation_weight(&(HTSsynth->engine), 0, 0, 1.0); HTS_Engine_set_parameter_interpolation_weight(&(HTSsynth->engine), 1, 0, 1.0); if (HTSsynth->me == TRUE) HTS_Engine_set_parameter_interpolation_weight(&(HTSsynth->engine), 2, 0, 1.0); HTS_Engine_set_gv_interpolation_weight(&(HTSsynth->engine), 0, 0, 1.0); HTS_Engine_set_gv_interpolation_weight(&(HTSsynth->engine), 1, 0, 1.0); if (HTSsynth->me == TRUE) HTS_Engine_set_gv_interpolation_weight(&(HTSsynth->engine), 2, 0, 1.0); /* all OK */ S_FREE(voice_base_path); return; /* error clean up */ quit_error: HTS_Engine_clear(&(HTSsynth->engine)); filter_destructor(HTSsynth); if (voice_base_path != NULL) S_FREE(voice_base_path); }
int htsSynthesize(int argc, char **argv) { int i; double f; /* hts_engine API */ HTS_Engine engine; /* HTS voices */ size_t num_voices; char **fn_voices; /* input label file name */ char *labfn = NULL; /* output file pointers */ FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL; /* interpolation weights */ size_t num_interpolation_weights; /* output usage */ if (argc <= 1) usage(); /* initialize hts_engine API */ HTS_Engine_initialize(&engine); /* get HTS voice file names */ num_voices = 0; fn_voices = (char **) malloc(argc * sizeof(char *)); for (i = 0; i < argc; i++) { if (argv[i][0] == '-' && argv[i][1] == 'm') fn_voices[num_voices++] = argv[++i]; if (argv[i][0] == '-' && argv[i][1] == 'h') usage(); } if (num_voices == 0) { fprintf(stderr, "Error: HTS voice must be specified.\n"); free(fn_voices); return (-1); } /* load HTS voices */ if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) { fprintf(stderr, "Error: HTS voices cannot be loaded.\n"); free(fn_voices); HTS_Engine_clear(&engine); return (-1); } free(fn_voices); /* get options */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE); break; default: fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } break; case 'o': switch (*(*argv + 2)) { case 'w': wavfp = fopen(*++argv, "wb"); break; case 'r': rawfp = fopen(*++argv, "wb"); break; case 'd': durfp = fopen(*++argv, "wt"); break; case 'm': mgcfp = fopen(*++argv, "wb"); break; case 'f': case 'p': lf0fp = fopen(*++argv, "wb"); break; case 'l': lpffp = fopen(*++argv, "wb"); break; case 't': tracefp = fopen(*++argv, "wt"); break; default: fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'h': usage(); break; case 'm': argv++; /* HTS voices were already loaded */ --argc; break; case 's': HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv)); --argc; break; case 'p': HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv)); --argc; break; case 'a': HTS_Engine_set_alpha(&engine, atof(*++argv)); --argc; break; case 'b': HTS_Engine_set_beta(&engine, atof(*++argv)); --argc; break; case 'r': HTS_Engine_set_speed(&engine, atof(*++argv)); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': HTS_Engine_add_half_tone(&engine, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'u': HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv)); --argc; break; case 'i': num_interpolation_weights = atoi(*++argv); argc--; if (num_interpolation_weights != num_voices) { HTS_Engine_clear(&engine); return(-1); } for (i = 0; i < (int) num_interpolation_weights; i++) { f = atof(*++argv); argc--; HTS_Engine_set_duration_interpolation_weight(&engine, i, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f); } break; case 'j': switch (*(*argv + 2)) { case 'm': HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv)); break; case 'f': case 'p': HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return(-1); } --argc; break; case 'g': HTS_Engine_set_volume(&engine, atof(*++argv)); --argc; break; case 'z': HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv)); --argc; break; default: fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1)); HTS_Engine_clear(&engine); return(-1); } } else { labfn = *argv; } } /* synthesize */ if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) { fprintf(stderr, "Error: waveform cannot be synthesized.\n"); HTS_Engine_clear(&engine); return(-1); } /* output */ if (tracefp != NULL) HTS_Engine_save_information(&engine, tracefp); if (durfp != NULL) HTS_Engine_save_label(&engine, durfp); if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); if (wavfp) HTS_Engine_save_riff(&engine, wavfp); if (mgcfp) HTS_Engine_save_generated_parameter(&engine, 0, mgcfp); if (lf0fp) HTS_Engine_save_generated_parameter(&engine, 1, lf0fp); if (lpffp) HTS_Engine_save_generated_parameter(&engine, 2, lpffp); /* reset */ HTS_Engine_refresh(&engine); /* free memory */ HTS_Engine_clear(&engine); /* close files */ if (durfp != NULL) fclose(durfp); if (mgcfp != NULL) fclose(mgcfp); if (lf0fp != NULL) fclose(lf0fp); if (lpffp != NULL) fclose(lpffp); if (wavfp != NULL) fclose(wavfp); if (rawfp != NULL) fclose(rawfp); if (tracefp != NULL) fclose(tracefp); return 0; }
int main(int argc, char **argv) { int i; double f; char *labfn = NULL; size_t labfn_size=0; char *guifn = NULL; FILE *guifp = NULL; FILE *rawfp = NULL; /* number of speakers for interpolation */ int num_interp = 0; double *rate_interp = NULL; /* file names of models */ char **fn_ms_dur; char **fn_ms_mgc; char **fn_ms_lf0; char **fn_ms_lpf; /* number of each models for interpolation */ int num_ms_dur = 0, num_ms_mgc = 0, num_ms_lf0 = 0, num_ms_lpf = 0; /* file names of trees */ char **fn_ts_dur; char **fn_ts_mgc; char **fn_ts_lf0; char **fn_ts_lpf; /* number of each trees for interpolation */ int num_ts_dur = 0, num_ts_mgc = 0, num_ts_lf0 = 0, num_ts_lpf = 0; /* file names of windows */ char **fn_ws_mgc; char **fn_ws_lf0; char **fn_ws_lpf; int num_ws_mgc = 0, num_ws_lf0 = 0, num_ws_lpf = 0; /* file names of global variance */ char **fn_ms_gvm = NULL; char **fn_ms_gvl = NULL; char **fn_ms_gvf = NULL; int num_ms_gvm = 0, num_ms_gvl = 0, num_ms_gvf = 0; /* file names of global variance trees */ char **fn_ts_gvm = NULL; char **fn_ts_gvl = NULL; char **fn_ts_gvf = NULL; int num_ts_gvm = 0, num_ts_gvl = 0, num_ts_gvf = 0; /* file name of global variance switch */ char *fn_gv_switch = NULL; /* global parameter */ int sampling_rate = 16000; int fperiod = 80; double alpha = 0.42; int stage = 0; /* Gamma=-1/stage: if stage=0 then Gamma=0 */ double beta = 0.0; int audio_buff_size = 1600; double uv_threshold = 0.5; double gv_weight_mgc = 1.0; double gv_weight_lf0 = 1.0; double gv_weight_lpf = 1.0; double half_tone = 0.0; HTS_Boolean phoneme_alignment = FALSE; double speech_speed = 1.0; HTS_Boolean use_log_gain = FALSE; /* engine */ HTS_Engine engine; /* parse command line */ if (argc == 1) Usage(); /* delta window handler for mel-cepstrum */ fn_ws_mgc = (char **) calloc(argc, sizeof(char *)); /* delta window handler for log f0 */ fn_ws_lf0 = (char **) calloc(argc, sizeof(char *)); /* delta window handler for low-pass filter */ fn_ws_lpf = (char **) calloc(argc, sizeof(char *)); /* prepare for interpolation */ num_interp = GetNumInterp(argc, argv); rate_interp = (double *) calloc(num_interp, sizeof(double)); for (i = 0; i < num_interp; i++) rate_interp[i] = 1.0; fn_ms_dur = (char **) calloc(num_interp, sizeof(char *)); fn_ms_mgc = (char **) calloc(num_interp, sizeof(char *)); fn_ms_lf0 = (char **) calloc(num_interp, sizeof(char *)); fn_ms_lpf = (char **) calloc(num_interp, sizeof(char *)); fn_ts_dur = (char **) calloc(num_interp, sizeof(char *)); fn_ts_mgc = (char **) calloc(num_interp, sizeof(char *)); fn_ts_lf0 = (char **) calloc(num_interp, sizeof(char *)); fn_ts_lpf = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvm = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvl = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvf = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvm = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvl = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvf = (char **) calloc(num_interp, sizeof(char *)); /* read command */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': phoneme_alignment = TRUE; break; default: Error(1, "hts_engine: Invalid option '-v%c'.\n", *(*argv + 2)); } break; case 't': switch (*(*argv + 2)) { case 'd': fn_ts_dur[num_ts_dur++] = *++argv; break; case 'm': fn_ts_mgc[num_ts_mgc++] = *++argv; break; case 'f': case 'p': fn_ts_lf0[num_ts_lf0++] = *++argv; break; case 'l': fn_ts_lpf[num_ts_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-t%c'.\n", *(*argv + 2)); } --argc; break; case 'm': switch (*(*argv + 2)) { case 'd': fn_ms_dur[num_ms_dur++] = *++argv; break; case 'm': fn_ms_mgc[num_ms_mgc++] = *++argv; break; case 'f': case 'p': fn_ms_lf0[num_ms_lf0++] = *++argv; break; case 'l': fn_ms_lpf[num_ms_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-m%c'.\n", *(*argv + 2)); } --argc; break; case 'd': switch (*(*argv + 2)) { case 'm': fn_ws_mgc[num_ws_mgc++] = *++argv; break; case 'f': case 'p': fn_ws_lf0[num_ws_lf0++] = *++argv; break; case 'l': fn_ws_lpf[num_ws_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-d%c'.\n", *(*argv + 2)); } --argc; break; case 'o': switch (*(*argv + 2)) { case 'r': rawfp = Getfp(*++argv, "ab"); break; default: Error(1, "festcat-hts_engine: Invalid option '-o%c'.\n", *(*argv + 2)); } --argc; break; case 'h': Usage(); break; case 's': sampling_rate = atoi(*++argv); --argc; break; case 'p': fperiod = atoi(*++argv); --argc; break; case 'a': alpha = atof(*++argv); --argc; break; case 'g': stage = atoi(*++argv); --argc; break; case 'l': use_log_gain = TRUE; break; case 'b': beta = atof(*++argv); --argc; break; case 'r': speech_speed = atof(*++argv); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': f = atof(*++argv); if (f < -24.0) f = -24.0; if (f > 24.0) f = 24.0; half_tone = f; break; default: Error(1, "hts_engine: Invalid option '-f%c'.\n", *(*argv + 2)); } --argc; break; case 'u': uv_threshold = atof(*++argv); --argc; break; case 'i': ++argv; argc--; for (i = 0; i < num_interp; i++) { rate_interp[i] = atof(*++argv); argc--; } break; case 'e': switch (*(*argv + 2)) { case 'm': fn_ts_gvm[num_ts_gvm++] = *++argv; break; case 'f': case 'p': fn_ts_gvl[num_ts_gvl++] = *++argv; break; case 'l': fn_ts_gvf[num_ts_gvf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-e%c'.\n", *(*argv + 2)); } --argc; break; case 'c': switch (*(*argv + 2)) { case 'm': fn_ms_gvm[num_ms_gvm++] = *++argv; break; case 'f': case 'p': fn_ms_gvl[num_ms_gvl++] = *++argv; break; case 'l': fn_ms_gvf[num_ms_gvf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-c%c'.\n", *(*argv + 2)); } --argc; break; case 'j': switch (*(*argv + 2)) { case 'm': gv_weight_mgc = atof(*++argv); break; case 'f': case 'p': gv_weight_lf0 = atof(*++argv); break; case 'l': gv_weight_lpf = atof(*++argv); break; default: Error(1, "hts_engine: Invalid option '-j%c'.\n", *(*argv + 2)); } --argc; break; case 'k': fn_gv_switch = *++argv; --argc; break; case 'z': audio_buff_size = atoi(*++argv); --argc; break; default: Error(1, "hts_engine: Invalid option '-%c'.\n", *(*argv + 1)); } } else { guifn = *argv; } } /* number of models,trees check */ if (num_interp != num_ts_dur || num_interp != num_ts_mgc || num_interp != num_ts_lf0 || num_interp != num_ms_dur || num_interp != num_ms_mgc || num_interp != num_ms_lf0) { Error(1, "hts_engine: specify %d models(trees) for each parameter.\n", num_interp); } if (num_ms_lpf > 0 || num_ts_lpf > 0) { if (num_interp != num_ms_lpf || num_interp != num_ts_lpf) { Error(1, "hts_engine: specify %d models(trees) for each parameter.\n", num_interp); } } /* initialize (stream[0] = spectrum, stream[1] = lf0, stream[2] = low-pass filter) */ if (num_ms_lpf > 0 || num_ts_lpf > 0) { HTS_Engine_initialize(&engine, 3); } else { HTS_Engine_initialize(&engine, 2); } /* load duration model */ HTS_Engine_load_duration_from_fn(&engine, fn_ms_dur, fn_ts_dur, num_interp); /* load stream[0] (spectrum model) */ HTS_Engine_load_parameter_from_fn(&engine, fn_ms_mgc, fn_ts_mgc, fn_ws_mgc, 0, FALSE, num_ws_mgc, num_interp); /* load stream[1] (lf0 model) */ HTS_Engine_load_parameter_from_fn(&engine, fn_ms_lf0, fn_ts_lf0, fn_ws_lf0, 1, TRUE, num_ws_lf0, num_interp); /* load stream[2] (low-pass filter model) */ if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_load_parameter_from_fn(&engine, fn_ms_lpf, fn_ts_lpf, fn_ws_lpf, 2, FALSE, num_ws_lpf, num_interp); /* load gv[0] (GV for spectrum) */ if (num_interp == num_ms_gvm) { if (num_ms_gvm == num_ts_gvm) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvm, fn_ts_gvm, 0, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvm, NULL, 0, num_interp); } /* load gv[1] (GV for lf0) */ if (num_interp == num_ms_gvl) { if (num_ms_gvl == num_ts_gvl) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvl, fn_ts_gvl, 1, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvl, NULL, 1, num_interp); } /* load gv[2] (GV for low-pass filter) */ if (num_interp == num_ms_gvf && (num_ms_lpf > 0 || num_ts_lpf > 0)) { if (num_ms_gvf == num_ts_gvf) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvf, fn_ts_gvf, 0, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvf, NULL, 2, num_interp); } /* load GV switch */ if (fn_gv_switch != NULL) HTS_Engine_load_gv_switch_from_fn(&engine, fn_gv_switch); /* set parameter */ HTS_Engine_set_sampling_rate(&engine, sampling_rate); HTS_Engine_set_fperiod(&engine, fperiod); HTS_Engine_set_alpha(&engine, alpha); HTS_Engine_set_gamma(&engine, stage); HTS_Engine_set_log_gain(&engine, use_log_gain); HTS_Engine_set_beta(&engine, beta); HTS_Engine_set_audio_buff_size(&engine, audio_buff_size); HTS_Engine_set_msd_threshold(&engine, 1, uv_threshold); /* set voiced/unvoiced threshold for stream[1] */ HTS_Engine_set_gv_weight(&engine, 0, gv_weight_mgc); HTS_Engine_set_gv_weight(&engine, 1, gv_weight_lf0); if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_set_gv_weight(&engine, 2, gv_weight_lpf); for (i = 0; i < num_interp; i++) { HTS_Engine_set_duration_interpolation_weight(&engine, i, rate_interp[i]); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, i, rate_interp[i]); HTS_Engine_set_parameter_interpolation_weight(&engine, 1, i, rate_interp[i]); if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_set_parameter_interpolation_weight(&engine, 2, i, rate_interp[i]); } if (num_interp == num_ms_gvm) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 0, i, rate_interp[i]); if (num_interp == num_ms_gvl) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 1, i, rate_interp[i]); if (num_interp == num_ms_gvf && (num_ms_lpf > 0 || num_ts_lpf > 0)) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 2, i, rate_interp[i]); /* synthesis */ guifp=Getfp(guifn, "r"); while (getline(&labfn,&labfn_size,guifp) != -1) { chomp(labfn); HTS_Engine_load_label_from_fn(&engine, labfn); /* load label file */ if (phoneme_alignment) /* modify label */ HTS_Label_set_frame_specified_flag(&engine.label, TRUE); if (speech_speed != 1.0) /* modify label */ HTS_Label_set_speech_speed(&engine.label, speech_speed); HTS_Engine_create_sstream(&engine); /* parse label and determine state duration */ if (half_tone != 0.0) { /* modify f0 */ for (i = 0; i < HTS_SStreamSet_get_total_state(&engine.sss); i++) { f = HTS_SStreamSet_get_mean(&engine.sss, 1, i, 0); f += half_tone * log(2.0) / 12; if (f < log(10.0)) f = log(10.0); HTS_SStreamSet_set_mean(&engine.sss, 1, i, 0, f); } } HTS_Engine_create_pstream(&engine); /* generate speech parameter vector sequence */ HTS_Engine_create_gstream(&engine); /* synthesize speech */ /* output */ if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); /* free */ HTS_Engine_refresh(&engine); } if (guifp != NULL) fclose(guifp); free(labfn); /* free memory */ HTS_Engine_clear(&engine); free(rate_interp); free(fn_ws_mgc); free(fn_ws_lf0); free(fn_ws_lpf); free(fn_ms_mgc); free(fn_ms_lf0); free(fn_ms_lpf); free(fn_ms_dur); free(fn_ts_mgc); free(fn_ts_lf0); free(fn_ts_lpf); free(fn_ts_dur); free(fn_ms_gvm); free(fn_ms_gvl); free(fn_ms_gvf); free(fn_ts_gvm); free(fn_ts_gvl); free(fn_ts_gvf); /* close files */ if (rawfp != NULL) fclose(rawfp); return 0; }