void TextToSpeech::synthesis(char *txt, FILE * wavfp) { char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk_.mecab, buff); mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab)); njd_set_pronunciation(&open_jtalk_.njd); njd_set_digit(&open_jtalk_.njd); njd_set_accent_phrase(&open_jtalk_.njd); njd_set_accent_type(&open_jtalk_.njd); njd_set_unvoiced_vowel(&open_jtalk_.njd); njd_set_long_vowel(&open_jtalk_.njd); njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd); JPCommon_make_label(&open_jtalk_.jpcommon); if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) { HTS_Engine_load_label_from_string_list( &open_jtalk_.engine, JPCommon_get_label_feature(&open_jtalk_.jpcommon), JPCommon_get_label_size(&open_jtalk_.jpcommon) ); HTS_Engine_create_sstream(&open_jtalk_.engine); HTS_Engine_create_pstream(&open_jtalk_.engine); HTS_Engine_create_gstream(&open_jtalk_.engine); if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk_.engine, wavfp); HTS_Engine_refresh(&open_jtalk_.engine); } JPCommon_refresh(&open_jtalk_.jpcommon); NJD_refresh(&open_jtalk_.njd); Mecab_refresh(&open_jtalk_.mecab); }
void TextToSpeech::synthesis(const char *txt) { char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk_.mecab, buff); mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab)); njd_set_pronunciation(&open_jtalk_.njd); njd_set_digit(&open_jtalk_.njd); njd_set_accent_phrase(&open_jtalk_.njd); njd_set_accent_type(&open_jtalk_.njd); njd_set_unvoiced_vowel(&open_jtalk_.njd); njd_set_long_vowel(&open_jtalk_.njd); njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd); JPCommon_make_label(&open_jtalk_.jpcommon); if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) { unsigned int pcm_len; HTS_Engine_load_label_from_string_list( &open_jtalk_.engine, JPCommon_get_label_feature(&open_jtalk_.jpcommon), JPCommon_get_label_size(&open_jtalk_.jpcommon) ); HTS_Engine_create_sstream(&open_jtalk_.engine); HTS_Engine_create_pstream(&open_jtalk_.engine); HTS_Engine_create_gstream(&open_jtalk_.engine); pcm_len = HTS_Engine_get_generated_speech_size(&open_jtalk_.engine); pcm_ = new short[pcm_len]; HTS_Engine_get_generated_speech(&open_jtalk_.engine, pcm_); play_write(play_h_, pcm_, pcm_len * sizeof(short)); HTS_Engine_refresh(&open_jtalk_.engine); } JPCommon_refresh(&open_jtalk_.jpcommon); NJD_refresh(&open_jtalk_.njd); Mecab_refresh(&open_jtalk_.mecab); }
int OpenJTalk_synthesis_towav(OpenJTalk** openjtalk,const char* text, const char* wavfilename) { char buff[MAXBUFLEN]; FILE * wavfp; wavfp = fopen(wavfilename,"wb"); if (!wavfp) { sprintf((*openjtalk)->errorout,"can not open %s.",wavfilename); return 0; } text2mecab(buff, (char*)text); Mecab_analysis((*openjtalk)->mecab, buff); mecab2njd(&(*openjtalk)->njd, Mecab_get_feature((*openjtalk)->mecab), Mecab_get_size((*openjtalk)->mecab)); njd_set_pronunciation(&(*openjtalk)->njd); njd_set_digit(&(*openjtalk)->njd); njd_set_accent_phrase(&(*openjtalk)->njd); njd_set_accent_type(&(*openjtalk)->njd); njd_set_unvoiced_vowel(&(*openjtalk)->njd); njd_set_long_vowel(&(*openjtalk)->njd); njd2jpcommon(&(*openjtalk)->jpcommon, &(*openjtalk)->njd); JPCommon_make_label(&(*openjtalk)->jpcommon); if (JPCommon_get_label_size(&(*openjtalk)->jpcommon) > 2) { HTS_Engine_load_label_from_string_list(&(*openjtalk)->engine, JPCommon_get_label_feature(&(*openjtalk)->jpcommon), JPCommon_get_label_size(&(*openjtalk)->jpcommon)); HTS_Engine_create_sstream(&(*openjtalk)->engine); HTS_Engine_create_pstream(&(*openjtalk)->engine); HTS_Engine_create_gstream(&(*openjtalk)->engine); HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp); /* if (wavfp != NULL) HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&(*openjtalk)->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&(*openjtalk)->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&(*openjtalk)->engine, logfp); } */ HTS_Engine_refresh(&(*openjtalk)->engine); } JPCommon_refresh(&(*openjtalk)->jpcommon); NJD_refresh(&(*openjtalk)->njd); Mecab_refresh((*openjtalk)->mecab); fclose(wavfp); return 1; }
/* Flite_HTS_Engine_synthesis: speech synthesis */ void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp) { int i; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return; u = flite_synth_text(txt, v); if (u == NULL) return; for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) return; label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size); HTS_Engine_create_sstream(&f->engine); HTS_Engine_create_pstream(&f->engine); HTS_Engine_create_gstream(&f->engine); if (wavfp != NULL) HTS_Engine_save_riff(&f->engine, wavfp); HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); }
void OpenJTalk_synthesis(OpenJTalk * open_jtalk, char *txt, FILE * wavfp, FILE * logfp) { char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk->mecab, buff); mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab), Mecab_get_size(&open_jtalk->mecab)); njd_set_pronunciation(&open_jtalk->njd); njd_set_digit(&open_jtalk->njd); njd_set_accent_phrase(&open_jtalk->njd); njd_set_accent_type(&open_jtalk->njd); njd_set_unvoiced_vowel(&open_jtalk->njd); njd_set_long_vowel(&open_jtalk->njd); njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd); JPCommon_make_label(&open_jtalk->jpcommon); if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) { HTS_Engine_load_label_from_string_list(&open_jtalk->engine, JPCommon_get_label_feature(&open_jtalk->jpcommon), JPCommon_get_label_size(&open_jtalk->jpcommon)); HTS_Engine_create_sstream(&open_jtalk->engine); HTS_Engine_create_pstream(&open_jtalk->engine); HTS_Engine_create_gstream(&open_jtalk->engine); if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&open_jtalk->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&open_jtalk->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&open_jtalk->engine, logfp); } HTS_Engine_refresh(&open_jtalk->engine); } JPCommon_refresh(&open_jtalk->jpcommon); NJD_refresh(&open_jtalk->njd); Mecab_refresh(&open_jtalk->mecab); }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { SHTSEngineMESynthUttProc105 *HTSsynth = (SHTSEngineMESynthUttProc105*)self; SPlugin *audioPlugin; const SRelation *segmentRel; SAudio *audio = NULL; s_bool is_present; char **label_data = NULL; int label_size; const SItem *item; const SItem *itemItr; int counter; uint i; int frame; int state; const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate; int nstate; S_CLR_ERR(error); /* we require the segment relation */ is_present = SUtteranceRelationIsPresent(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceRelationIsPresent\" failed")) goto quit_error; if (!is_present) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to find 'Segment' relation in utterance"); goto quit_error; } segmentRel = SUtteranceGetRelation(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) goto quit_error; item = SRelationHead(segmentRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; itemItr = item; label_size = 0; while (itemItr != NULL) { label_size++; itemItr = SItemNext(itemItr, error); } label_data = S_CALLOC(char*, label_size); itemItr = item; counter = 0; while (itemItr != NULL) { SObject *dFeat; const char *tmp; dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToFeatProc\" failed")) goto quit_error; if (dFeat == NULL) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to generate hts labels for segment item"); goto quit_error; } tmp = SObjectGetString(dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectGetString\" failed")) goto quit_error; label_data[counter++] = s_strdup(tmp, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_strdup\" failed")) goto quit_error; SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetObject\" failed")) goto quit_error; itemItr = SItemNext(itemItr, error); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size); check_and_change_rate_volume(HTSsynth, utt, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"check_and_change_rate_volume\" failed")) goto quit_error; HTS_Engine_create_sstream(&(HTSsynth->engine)); check_and_change_tone(HTSsynth, utt, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"check_and_change_tone\" failed")) goto quit_error; HTS_Engine_create_pstream(&(HTSsynth->engine)); if (HTSsynth->me == TRUE) /* mixed excitation */ { HTS_Engine_create_gstream_me(&(HTSsynth->engine), HTSsynth->me_num_filters, HTSsynth->me_filter_order, HTSsynth->me_filter, HTSsynth->xp_sig, HTSsynth->xn_sig, HTSsynth->hp, HTSsynth->hn, HTSsynth->pd_filter, HTSsynth->pd_filter_order); } else { HTS_Engine_create_gstream(&(HTSsynth->engine)); } nstate = HTS_Speect_ModelSet_get_nstate(&(HTSsynth->engine)); itemItr = item; counter = 0; frame = 0; state = 0; while (itemItr != NULL) { int j; int duration = 0; float tmp; for (j = 0; j < nstate; j++) duration += HTS_Speect_SStreamSet_get_duration(&(HTSsynth->engine), state++); tmp = frame * rate; SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; tmp = (frame + duration) * rate; SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; frame += duration; itemItr = SItemNext(itemItr, error); counter++; } /* We need to give the utterance the audio plug-in. If we don't do * this and the voice is deleted before the utterance, then the * utterance can't do *anything* with the audio. Not even delete * it (segfault). This should be fast because it is already * loaded. * Note that this happens before the audio is set. This is because * utt features are a list implementation. */ audioPlugin = s_pm_load_plugin("audio.spi", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) { S_DELETE(audioPlugin, "Run", error); goto quit_error; } /* create an audio object */ audio = S_NEW(SAudio, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to create new 'SAudio' object")) goto quit_error; /* set audio feature in utterance */ SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) { S_DELETE(audio, "Run", error); goto quit_error; } audio->sample_rate = HTSsynth->engine.global.sampling_rate; audio->num_samples = (uint32)HTS_Speect_GStreamSet_get_total_nsample(&(HTSsynth->engine)); audio->samples = S_MALLOC(float, audio->num_samples); if (audio->samples == NULL) { S_FTL_ERR(error, S_MEMERROR, "Run", "Failed to allocate memory for 'float' object"); goto quit_error; } /* write data */ for (i = 0; i < audio->num_samples; i++) audio->samples[i] = (float)(HTS_Speect_GStreamSet_get_speech(&(HTSsynth->engine), i) * 1.0); for (counter = 0; counter < label_size; counter++) S_FREE(label_data[counter]); S_FREE(label_data); HTS_Engine_refresh(&(HTSsynth->engine)); /* all OK here */ return; /* error clean-up code */ quit_error: if (label_data != NULL) { for (counter = 0; counter < label_size; counter++) { if (label_data[counter] != NULL) S_FREE(label_data[counter]); } S_FREE(label_data); } return; }
int main(int argc, char **argv) { int i; double f; char *labfn = NULL; size_t labfn_size=0; char *guifn = NULL; FILE *guifp = NULL; FILE *rawfp = NULL; /* number of speakers for interpolation */ int num_interp = 0; double *rate_interp = NULL; /* file names of models */ char **fn_ms_dur; char **fn_ms_mgc; char **fn_ms_lf0; char **fn_ms_lpf; /* number of each models for interpolation */ int num_ms_dur = 0, num_ms_mgc = 0, num_ms_lf0 = 0, num_ms_lpf = 0; /* file names of trees */ char **fn_ts_dur; char **fn_ts_mgc; char **fn_ts_lf0; char **fn_ts_lpf; /* number of each trees for interpolation */ int num_ts_dur = 0, num_ts_mgc = 0, num_ts_lf0 = 0, num_ts_lpf = 0; /* file names of windows */ char **fn_ws_mgc; char **fn_ws_lf0; char **fn_ws_lpf; int num_ws_mgc = 0, num_ws_lf0 = 0, num_ws_lpf = 0; /* file names of global variance */ char **fn_ms_gvm = NULL; char **fn_ms_gvl = NULL; char **fn_ms_gvf = NULL; int num_ms_gvm = 0, num_ms_gvl = 0, num_ms_gvf = 0; /* file names of global variance trees */ char **fn_ts_gvm = NULL; char **fn_ts_gvl = NULL; char **fn_ts_gvf = NULL; int num_ts_gvm = 0, num_ts_gvl = 0, num_ts_gvf = 0; /* file name of global variance switch */ char *fn_gv_switch = NULL; /* global parameter */ int sampling_rate = 16000; int fperiod = 80; double alpha = 0.42; int stage = 0; /* Gamma=-1/stage: if stage=0 then Gamma=0 */ double beta = 0.0; int audio_buff_size = 1600; double uv_threshold = 0.5; double gv_weight_mgc = 1.0; double gv_weight_lf0 = 1.0; double gv_weight_lpf = 1.0; double half_tone = 0.0; HTS_Boolean phoneme_alignment = FALSE; double speech_speed = 1.0; HTS_Boolean use_log_gain = FALSE; /* engine */ HTS_Engine engine; /* parse command line */ if (argc == 1) Usage(); /* delta window handler for mel-cepstrum */ fn_ws_mgc = (char **) calloc(argc, sizeof(char *)); /* delta window handler for log f0 */ fn_ws_lf0 = (char **) calloc(argc, sizeof(char *)); /* delta window handler for low-pass filter */ fn_ws_lpf = (char **) calloc(argc, sizeof(char *)); /* prepare for interpolation */ num_interp = GetNumInterp(argc, argv); rate_interp = (double *) calloc(num_interp, sizeof(double)); for (i = 0; i < num_interp; i++) rate_interp[i] = 1.0; fn_ms_dur = (char **) calloc(num_interp, sizeof(char *)); fn_ms_mgc = (char **) calloc(num_interp, sizeof(char *)); fn_ms_lf0 = (char **) calloc(num_interp, sizeof(char *)); fn_ms_lpf = (char **) calloc(num_interp, sizeof(char *)); fn_ts_dur = (char **) calloc(num_interp, sizeof(char *)); fn_ts_mgc = (char **) calloc(num_interp, sizeof(char *)); fn_ts_lf0 = (char **) calloc(num_interp, sizeof(char *)); fn_ts_lpf = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvm = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvl = (char **) calloc(num_interp, sizeof(char *)); fn_ms_gvf = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvm = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvl = (char **) calloc(num_interp, sizeof(char *)); fn_ts_gvf = (char **) calloc(num_interp, sizeof(char *)); /* read command */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': phoneme_alignment = TRUE; break; default: Error(1, "hts_engine: Invalid option '-v%c'.\n", *(*argv + 2)); } break; case 't': switch (*(*argv + 2)) { case 'd': fn_ts_dur[num_ts_dur++] = *++argv; break; case 'm': fn_ts_mgc[num_ts_mgc++] = *++argv; break; case 'f': case 'p': fn_ts_lf0[num_ts_lf0++] = *++argv; break; case 'l': fn_ts_lpf[num_ts_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-t%c'.\n", *(*argv + 2)); } --argc; break; case 'm': switch (*(*argv + 2)) { case 'd': fn_ms_dur[num_ms_dur++] = *++argv; break; case 'm': fn_ms_mgc[num_ms_mgc++] = *++argv; break; case 'f': case 'p': fn_ms_lf0[num_ms_lf0++] = *++argv; break; case 'l': fn_ms_lpf[num_ms_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-m%c'.\n", *(*argv + 2)); } --argc; break; case 'd': switch (*(*argv + 2)) { case 'm': fn_ws_mgc[num_ws_mgc++] = *++argv; break; case 'f': case 'p': fn_ws_lf0[num_ws_lf0++] = *++argv; break; case 'l': fn_ws_lpf[num_ws_lpf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-d%c'.\n", *(*argv + 2)); } --argc; break; case 'o': switch (*(*argv + 2)) { case 'r': rawfp = Getfp(*++argv, "ab"); break; default: Error(1, "festcat-hts_engine: Invalid option '-o%c'.\n", *(*argv + 2)); } --argc; break; case 'h': Usage(); break; case 's': sampling_rate = atoi(*++argv); --argc; break; case 'p': fperiod = atoi(*++argv); --argc; break; case 'a': alpha = atof(*++argv); --argc; break; case 'g': stage = atoi(*++argv); --argc; break; case 'l': use_log_gain = TRUE; break; case 'b': beta = atof(*++argv); --argc; break; case 'r': speech_speed = atof(*++argv); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': f = atof(*++argv); if (f < -24.0) f = -24.0; if (f > 24.0) f = 24.0; half_tone = f; break; default: Error(1, "hts_engine: Invalid option '-f%c'.\n", *(*argv + 2)); } --argc; break; case 'u': uv_threshold = atof(*++argv); --argc; break; case 'i': ++argv; argc--; for (i = 0; i < num_interp; i++) { rate_interp[i] = atof(*++argv); argc--; } break; case 'e': switch (*(*argv + 2)) { case 'm': fn_ts_gvm[num_ts_gvm++] = *++argv; break; case 'f': case 'p': fn_ts_gvl[num_ts_gvl++] = *++argv; break; case 'l': fn_ts_gvf[num_ts_gvf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-e%c'.\n", *(*argv + 2)); } --argc; break; case 'c': switch (*(*argv + 2)) { case 'm': fn_ms_gvm[num_ms_gvm++] = *++argv; break; case 'f': case 'p': fn_ms_gvl[num_ms_gvl++] = *++argv; break; case 'l': fn_ms_gvf[num_ms_gvf++] = *++argv; break; default: Error(1, "hts_engine: Invalid option '-c%c'.\n", *(*argv + 2)); } --argc; break; case 'j': switch (*(*argv + 2)) { case 'm': gv_weight_mgc = atof(*++argv); break; case 'f': case 'p': gv_weight_lf0 = atof(*++argv); break; case 'l': gv_weight_lpf = atof(*++argv); break; default: Error(1, "hts_engine: Invalid option '-j%c'.\n", *(*argv + 2)); } --argc; break; case 'k': fn_gv_switch = *++argv; --argc; break; case 'z': audio_buff_size = atoi(*++argv); --argc; break; default: Error(1, "hts_engine: Invalid option '-%c'.\n", *(*argv + 1)); } } else { guifn = *argv; } } /* number of models,trees check */ if (num_interp != num_ts_dur || num_interp != num_ts_mgc || num_interp != num_ts_lf0 || num_interp != num_ms_dur || num_interp != num_ms_mgc || num_interp != num_ms_lf0) { Error(1, "hts_engine: specify %d models(trees) for each parameter.\n", num_interp); } if (num_ms_lpf > 0 || num_ts_lpf > 0) { if (num_interp != num_ms_lpf || num_interp != num_ts_lpf) { Error(1, "hts_engine: specify %d models(trees) for each parameter.\n", num_interp); } } /* initialize (stream[0] = spectrum, stream[1] = lf0, stream[2] = low-pass filter) */ if (num_ms_lpf > 0 || num_ts_lpf > 0) { HTS_Engine_initialize(&engine, 3); } else { HTS_Engine_initialize(&engine, 2); } /* load duration model */ HTS_Engine_load_duration_from_fn(&engine, fn_ms_dur, fn_ts_dur, num_interp); /* load stream[0] (spectrum model) */ HTS_Engine_load_parameter_from_fn(&engine, fn_ms_mgc, fn_ts_mgc, fn_ws_mgc, 0, FALSE, num_ws_mgc, num_interp); /* load stream[1] (lf0 model) */ HTS_Engine_load_parameter_from_fn(&engine, fn_ms_lf0, fn_ts_lf0, fn_ws_lf0, 1, TRUE, num_ws_lf0, num_interp); /* load stream[2] (low-pass filter model) */ if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_load_parameter_from_fn(&engine, fn_ms_lpf, fn_ts_lpf, fn_ws_lpf, 2, FALSE, num_ws_lpf, num_interp); /* load gv[0] (GV for spectrum) */ if (num_interp == num_ms_gvm) { if (num_ms_gvm == num_ts_gvm) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvm, fn_ts_gvm, 0, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvm, NULL, 0, num_interp); } /* load gv[1] (GV for lf0) */ if (num_interp == num_ms_gvl) { if (num_ms_gvl == num_ts_gvl) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvl, fn_ts_gvl, 1, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvl, NULL, 1, num_interp); } /* load gv[2] (GV for low-pass filter) */ if (num_interp == num_ms_gvf && (num_ms_lpf > 0 || num_ts_lpf > 0)) { if (num_ms_gvf == num_ts_gvf) HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvf, fn_ts_gvf, 0, num_interp); else HTS_Engine_load_gv_from_fn(&engine, fn_ms_gvf, NULL, 2, num_interp); } /* load GV switch */ if (fn_gv_switch != NULL) HTS_Engine_load_gv_switch_from_fn(&engine, fn_gv_switch); /* set parameter */ HTS_Engine_set_sampling_rate(&engine, sampling_rate); HTS_Engine_set_fperiod(&engine, fperiod); HTS_Engine_set_alpha(&engine, alpha); HTS_Engine_set_gamma(&engine, stage); HTS_Engine_set_log_gain(&engine, use_log_gain); HTS_Engine_set_beta(&engine, beta); HTS_Engine_set_audio_buff_size(&engine, audio_buff_size); HTS_Engine_set_msd_threshold(&engine, 1, uv_threshold); /* set voiced/unvoiced threshold for stream[1] */ HTS_Engine_set_gv_weight(&engine, 0, gv_weight_mgc); HTS_Engine_set_gv_weight(&engine, 1, gv_weight_lf0); if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_set_gv_weight(&engine, 2, gv_weight_lpf); for (i = 0; i < num_interp; i++) { HTS_Engine_set_duration_interpolation_weight(&engine, i, rate_interp[i]); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, i, rate_interp[i]); HTS_Engine_set_parameter_interpolation_weight(&engine, 1, i, rate_interp[i]); if (num_ms_lpf > 0 || num_ts_lpf > 0) HTS_Engine_set_parameter_interpolation_weight(&engine, 2, i, rate_interp[i]); } if (num_interp == num_ms_gvm) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 0, i, rate_interp[i]); if (num_interp == num_ms_gvl) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 1, i, rate_interp[i]); if (num_interp == num_ms_gvf && (num_ms_lpf > 0 || num_ts_lpf > 0)) for (i = 0; i < num_interp; i++) HTS_Engine_set_gv_interpolation_weight(&engine, 2, i, rate_interp[i]); /* synthesis */ guifp=Getfp(guifn, "r"); while (getline(&labfn,&labfn_size,guifp) != -1) { chomp(labfn); HTS_Engine_load_label_from_fn(&engine, labfn); /* load label file */ if (phoneme_alignment) /* modify label */ HTS_Label_set_frame_specified_flag(&engine.label, TRUE); if (speech_speed != 1.0) /* modify label */ HTS_Label_set_speech_speed(&engine.label, speech_speed); HTS_Engine_create_sstream(&engine); /* parse label and determine state duration */ if (half_tone != 0.0) { /* modify f0 */ for (i = 0; i < HTS_SStreamSet_get_total_state(&engine.sss); i++) { f = HTS_SStreamSet_get_mean(&engine.sss, 1, i, 0); f += half_tone * log(2.0) / 12; if (f < log(10.0)) f = log(10.0); HTS_SStreamSet_set_mean(&engine.sss, 1, i, 0, f); } } HTS_Engine_create_pstream(&engine); /* generate speech parameter vector sequence */ HTS_Engine_create_gstream(&engine); /* synthesize speech */ /* output */ if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); /* free */ HTS_Engine_refresh(&engine); } if (guifp != NULL) fclose(guifp); free(labfn); /* free memory */ HTS_Engine_clear(&engine); free(rate_interp); free(fn_ws_mgc); free(fn_ws_lf0); free(fn_ws_lpf); free(fn_ms_mgc); free(fn_ms_lf0); free(fn_ms_lpf); free(fn_ms_dur); free(fn_ts_mgc); free(fn_ts_lf0); free(fn_ts_lpf); free(fn_ts_dur); free(fn_ms_gvm); free(fn_ms_gvl); free(fn_ms_gvf); free(fn_ts_gvm); free(fn_ts_gvl); free(fn_ts_gvf); /* close files */ if (rawfp != NULL) fclose(rawfp); return 0; }