static int setup(struct app *app) { #ifdef HTS_MELP #define NR_STREAMS 3 #else #define NR_STREAMS 2 #endif /* HTS_MELP */ double gv_weight[] = { app->gv_weight_mgc, app->gv_weight_lf0, #ifdef HTS_MELP app->gv_weight_lpf #endif /* HTS_MELP */ }; int i; app->play_h = play_init(&app->play_info, "default", SND_PCM_FORMAT_S16_LE, 1, app->sampling_rate, 500000, 8); Mecab_initialize(&app->mecab); if (Mecab_load(&app->mecab, app->dn_mecab) != TRUE) return -1; NJD_initialize(&app->njd); JPCommon_initialize(&app->jpcommon); HTS_Engine_initialize(&app->engine); if (HTS_Engine_load(&app->engine, &app->fn_voice, 1) != TRUE) return -1; HTS_Engine_set_sampling_frequency(&app->engine, (size_t)app->sampling_rate); if (app->fperiod >= 0) HTS_Engine_set_fperiod(&app->engine, app->fperiod); if (app->alpha >= 0.0) HTS_Engine_set_alpha(&app->engine, app->alpha); if (app->beta >= 0.0) HTS_Engine_set_beta(&app->engine, app->beta); if (app->half_tone >= 0.0) HTS_Engine_add_half_tone(&app->engine, app->half_tone); if (app->audio_buff_size > 0) HTS_Engine_set_audio_buff_size(&app->engine, app->audio_buff_size); if (app->uv_threshold >= 0.0) HTS_Engine_set_msd_threshold(&app->engine, 1, app->uv_threshold); if (app->speed >= 0.0) HTS_Engine_set_speed(&app->engine, app->speed); for (i = 0; i < NR_STREAMS; i++) if (gv_weight[i] >= 0.0) HTS_Engine_set_gv_weight(&app->engine, i, gv_weight[i]); return 0; }
int main(int argc, char* argv[]) { size_t num_voices; char **fn_voices; char* in_fname; char* output_fname; FILE * outfp; char* dur_fname; FILE * durfp; bool print_label = false; bool print_utt = false; bool write_raw = false; bool write_durlabel = false; CFSAString LexFileName, LexDFileName; HTS_Engine engine; double speed = 1.1; size_t fr = 48000; size_t fp = 240; float alpha = 0.55; float beta = 0.0; float ht = 2.0; float th = 0.5; float gvw1 = 1.0; float gvw2 = 1.2; FSCInit(); fn_voices = (char **) malloc(argc * sizeof (char *)); if (argc < 11) { fprintf(stderr, "Viga: liiga vähe parameetreid\n\n"); PrintUsage(); } for (int i = 0; i < argc; i++) { if (CFSAString("-lex") == argv[i]) { if (i + 1 < argc) { LexFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-lexd") == argv[i]) { if (i + 1 < argc) { LexDFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-m") == argv[i]) { if (i + 1 < argc) { fn_voices[0] = argv[i + 1]; } else { fprintf(stderr, "Viga: puudub *.htsvoice fail\n"); PrintUsage(); exit(0); } } if (CFSAString("-o") == argv[i]) { if (i + 1 < argc) { output_fname = argv[i + 1]; cfileexists(output_fname); } else { fprintf(stderr, "Viga: puudb väljundfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-f") == argv[i]) { if (i + 1 < argc) { in_fname = argv[i + 1]; } else { fprintf(stderr, "Viga: puudb sisendfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-s") == argv[i]) { if (i + 1 < argc) { samplerate(fr, fp, alpha, atoi(argv[i + 1])); } } if (CFSAString("-r") == argv[i]) { if (i + 1 < argc) { speed = atof(argv[i + 1]); } } if (CFSAString("-ht") == argv[i]) { if (i + 1 < argc) { ht = atof(argv[i + 1]); } } if (CFSAString("-gvw1") == argv[i]) { if (i + 1 < argc) { gvw1 = atof(argv[i + 1]); } } if (CFSAString("-gvw2") == argv[i]) { if (i + 1 < argc) { gvw2 = atof(argv[i + 1]); } } if (CFSAString("-debug") == argv[i]) { print_label = true; } if (CFSAString("-utt") == argv[i]) { print_utt = true; } if (CFSAString("-raw") == argv[i]) { write_raw = true; } if (CFSAString("-dur") == argv[i]) { if (i + 1 < argc) { dur_fname = argv[i + 1]; cfileexists(dur_fname); write_durlabel = true; } else { fprintf(stderr, "Viga: puudb kestustefaili nimi\n"); PrintUsage(); exit(0); } } } Linguistic.Open(LexFileName); Disambiguator.Open(LexDFileName); CFSWString text; ReadUTF8Text(text, in_fname); HTS_Engine_initialize(&engine); if (HTS_Engine_load(&engine, fn_voices, 1) != TRUE) { fprintf(stderr, "Viga: puudub *.htsvoice. %p\n", fn_voices[0]); free(fn_voices); HTS_Engine_clear(&engine); exit(1); } free(fn_voices); HTS_Engine_set_sampling_frequency(&engine, (size_t) fr); HTS_Engine_set_phoneme_alignment_flag(&engine, FALSE); HTS_Engine_set_fperiod(&engine, (size_t) fp); HTS_Engine_set_alpha(&engine, alpha); HTS_Engine_set_beta(&engine, beta); HTS_Engine_set_speed(&engine, speed); HTS_Engine_add_half_tone(&engine, ht); HTS_Engine_set_msd_threshold(&engine, 1, th); /* HTS_Engine_set_duration_interpolation_weight(&engine, 1, diw); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 0, piw1); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 1, piw2); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 0, giw1); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 1, giw2); */ HTS_Engine_set_gv_weight(&engine, 0, gvw1); HTS_Engine_set_gv_weight(&engine, 1, gvw2); text = DealWithText(text); CFSArray<CFSWString> res = do_utterances(text); INTPTR data_size = 0; outfp = fopen(output_fname, "wb"); if (write_durlabel) durfp = fopen(dur_fname, "w"); if (!write_raw) HTS_Engine_write_header(&engine, outfp, 1); for (INTPTR i = 0; i < res.GetSize(); i++) { CFSArray<CFSWString> label = do_all(res[i], print_label, print_utt); std::vector<std::string> v; v = to_vector(label); std::vector<char*> vc; fill_char_vector(v, vc); size_t n_lines = vc.size(); if (HTS_Engine_synthesize_from_strings(&engine, &vc[0], n_lines) != TRUE) { fprintf(stderr, "Viga: süntees ebaonnestus.\n"); HTS_Engine_clear(&engine); exit(1); } clean_char_vector(vc); data_size += HTS_Engine_engine_speech_size(&engine); if (write_durlabel) HTS_Engine_save_durlabel(&engine, durfp); HTS_Engine_save_generated_speech(&engine, outfp); HTS_Engine_refresh(&engine); } //synth loop if (!write_raw) HTS_Engine_write_header(&engine, outfp, data_size); if (write_durlabel) fclose(durfp); fclose(outfp); HTS_Engine_clear(&engine); Linguistic.Close(); FSCTerminate(); return 0; }
void std_hts_engine_impl::set_speed() { if(rate==1) return; HTS_Engine_set_speed(engine.get(),rate); }
/* Flite_HTS_Engine_set_speed: set speech speed */ void Flite_HTS_Engine_set_speed(Flite_HTS_Engine * f, double d) { HTS_Engine_set_speed(&f->engine, d); }
static void Open_JTalk_set_speed(Open_JTalk * open_jtalk, double f) { HTS_Engine_set_speed(&open_jtalk->engine, f); }
int htsSynthesize(int argc, char **argv) { int i; double f; /* hts_engine API */ HTS_Engine engine; /* HTS voices */ size_t num_voices; char **fn_voices; /* input label file name */ char *labfn = NULL; /* output file pointers */ FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL; /* interpolation weights */ size_t num_interpolation_weights; /* output usage */ if (argc <= 1) usage(); /* initialize hts_engine API */ HTS_Engine_initialize(&engine); /* get HTS voice file names */ num_voices = 0; fn_voices = (char **) malloc(argc * sizeof(char *)); for (i = 0; i < argc; i++) { if (argv[i][0] == '-' && argv[i][1] == 'm') fn_voices[num_voices++] = argv[++i]; if (argv[i][0] == '-' && argv[i][1] == 'h') usage(); } if (num_voices == 0) { fprintf(stderr, "Error: HTS voice must be specified.\n"); free(fn_voices); return (-1); } /* load HTS voices */ if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) { fprintf(stderr, "Error: HTS voices cannot be loaded.\n"); free(fn_voices); HTS_Engine_clear(&engine); return (-1); } free(fn_voices); /* get options */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE); break; default: fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } break; case 'o': switch (*(*argv + 2)) { case 'w': wavfp = fopen(*++argv, "wb"); break; case 'r': rawfp = fopen(*++argv, "wb"); break; case 'd': durfp = fopen(*++argv, "wt"); break; case 'm': mgcfp = fopen(*++argv, "wb"); break; case 'f': case 'p': lf0fp = fopen(*++argv, "wb"); break; case 'l': lpffp = fopen(*++argv, "wb"); break; case 't': tracefp = fopen(*++argv, "wt"); break; default: fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'h': usage(); break; case 'm': argv++; /* HTS voices were already loaded */ --argc; break; case 's': HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv)); --argc; break; case 'p': HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv)); --argc; break; case 'a': HTS_Engine_set_alpha(&engine, atof(*++argv)); --argc; break; case 'b': HTS_Engine_set_beta(&engine, atof(*++argv)); --argc; break; case 'r': HTS_Engine_set_speed(&engine, atof(*++argv)); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': HTS_Engine_add_half_tone(&engine, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'u': HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv)); --argc; break; case 'i': num_interpolation_weights = atoi(*++argv); argc--; if (num_interpolation_weights != num_voices) { HTS_Engine_clear(&engine); return(-1); } for (i = 0; i < (int) num_interpolation_weights; i++) { f = atof(*++argv); argc--; HTS_Engine_set_duration_interpolation_weight(&engine, i, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f); } break; case 'j': switch (*(*argv + 2)) { case 'm': HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv)); break; case 'f': case 'p': HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return(-1); } --argc; break; case 'g': HTS_Engine_set_volume(&engine, atof(*++argv)); --argc; break; case 'z': HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv)); --argc; break; default: fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1)); HTS_Engine_clear(&engine); return(-1); } } else { labfn = *argv; } } /* synthesize */ if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) { fprintf(stderr, "Error: waveform cannot be synthesized.\n"); HTS_Engine_clear(&engine); return(-1); } /* output */ if (tracefp != NULL) HTS_Engine_save_information(&engine, tracefp); if (durfp != NULL) HTS_Engine_save_label(&engine, durfp); if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); if (wavfp) HTS_Engine_save_riff(&engine, wavfp); if (mgcfp) HTS_Engine_save_generated_parameter(&engine, 0, mgcfp); if (lf0fp) HTS_Engine_save_generated_parameter(&engine, 1, lf0fp); if (lpffp) HTS_Engine_save_generated_parameter(&engine, 2, lpffp); /* reset */ HTS_Engine_refresh(&engine); /* free memory */ HTS_Engine_clear(&engine); /* close files */ if (durfp != NULL) fclose(durfp); if (mgcfp != NULL) fclose(mgcfp); if (lf0fp != NULL) fclose(lf0fp); if (lpffp != NULL) fclose(lpffp); if (wavfp != NULL) fclose(wavfp); if (rawfp != NULL) fclose(rawfp); if (tracefp != NULL) fclose(tracefp); return 0; }