/* Flite_HTS_Engine_synthesize: synthesize speech */ HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav) { int i; FILE *fp; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; if (txt == NULL) return FALSE; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return FALSE; u = flite_synth_text(txt, v); if (u == NULL) { UNREGISTER_VOX(v); return FALSE; } for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) { delete_utterance(u); UNREGISTER_VOX(v); return FALSE; } label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size); if (wav != NULL) { fp = fopen(wav, "wb"); HTS_Engine_save_riff(&f->engine, fp); fclose(fp); } HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); return TRUE; }
static int synthesize(struct app *app, char *txt) { char buff[MAXBUFLEN]; int label_size; int r = -1; text2mecab(buff, txt); Mecab_analysis(&app->mecab, buff); mecab2njd(&app->njd, Mecab_get_feature(&app->mecab), Mecab_get_size(&app->mecab)); njd_set_pronunciation(&app->njd); njd_set_digit(&app->njd); njd_set_accent_phrase(&app->njd); njd_set_accent_type(&app->njd); njd_set_unvoiced_vowel(&app->njd); njd_set_long_vowel(&app->njd); njd2jpcommon(&app->jpcommon, &app->njd); JPCommon_make_label(&app->jpcommon); label_size = JPCommon_get_label_size(&app->jpcommon); if (label_size > 2) { if (HTS_Engine_synthesize_from_strings( &app->engine, JPCommon_get_label_feature(&app->jpcommon), label_size) == TRUE) { unsigned int pcm_len; r = 0; /* success */ pcm_len = HTS_Engine_get_generated_speech_size( &app->engine); app->pcm = malloc(pcm_len * sizeof(short)); HTS_Engine_get_generated_speech(&app->engine, app->pcm); play_write(app->play_h, app->pcm, pcm_len * sizeof(short)); } if (app->logfp) { fprintf(app->logfp, "[Text analysis result]\n"); NJD_fprint(&app->njd, app->logfp); fprintf(app->logfp, "\n[Output label]\n"); HTS_Engine_save_label(&app->engine, app->logfp); fprintf(app->logfp, "\n"); HTS_Engine_save_information(&app->engine, app->logfp); } HTS_Engine_refresh(&app->engine); } JPCommon_refresh(&app->jpcommon); NJD_refresh(&app->njd); Mecab_refresh(&app->mecab); return r; }
static int Open_JTalk_synthesis(Open_JTalk * open_jtalk, const char *txt, FILE * wavfp, FILE * logfp) { int result = 0; char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk->mecab, buff); mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab), Mecab_get_size(&open_jtalk->mecab)); njd_set_pronunciation(&open_jtalk->njd); njd_set_digit(&open_jtalk->njd); njd_set_accent_phrase(&open_jtalk->njd); njd_set_accent_type(&open_jtalk->njd); njd_set_unvoiced_vowel(&open_jtalk->njd); njd_set_long_vowel(&open_jtalk->njd); njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd); JPCommon_make_label(&open_jtalk->jpcommon); if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) { if (HTS_Engine_synthesize_from_strings (&open_jtalk->engine, JPCommon_get_label_feature(&open_jtalk->jpcommon), JPCommon_get_label_size(&open_jtalk->jpcommon)) == TRUE) result = 1; if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&open_jtalk->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&open_jtalk->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&open_jtalk->engine, logfp); } HTS_Engine_refresh(&open_jtalk->engine); } JPCommon_refresh(&open_jtalk->jpcommon); NJD_refresh(&open_jtalk->njd); Mecab_refresh(&open_jtalk->mecab); return result; }
int main(int argc, char* argv[]) { size_t num_voices; char **fn_voices; char* in_fname; char* output_fname; FILE * outfp; char* dur_fname; FILE * durfp; bool print_label = false; bool print_utt = false; bool write_raw = false; bool write_durlabel = false; CFSAString LexFileName, LexDFileName; HTS_Engine engine; double speed = 1.1; size_t fr = 48000; size_t fp = 240; float alpha = 0.55; float beta = 0.0; float ht = 2.0; float th = 0.5; float gvw1 = 1.0; float gvw2 = 1.2; FSCInit(); fn_voices = (char **) malloc(argc * sizeof (char *)); if (argc < 11) { fprintf(stderr, "Viga: liiga vähe parameetreid\n\n"); PrintUsage(); } for (int i = 0; i < argc; i++) { if (CFSAString("-lex") == argv[i]) { if (i + 1 < argc) { LexFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-lexd") == argv[i]) { if (i + 1 < argc) { LexDFileName = argv[++i]; } else { return PrintUsage(); } } if (CFSAString("-m") == argv[i]) { if (i + 1 < argc) { fn_voices[0] = argv[i + 1]; } else { fprintf(stderr, "Viga: puudub *.htsvoice fail\n"); PrintUsage(); exit(0); } } if (CFSAString("-o") == argv[i]) { if (i + 1 < argc) { output_fname = argv[i + 1]; cfileexists(output_fname); } else { fprintf(stderr, "Viga: puudb väljundfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-f") == argv[i]) { if (i + 1 < argc) { in_fname = argv[i + 1]; } else { fprintf(stderr, "Viga: puudb sisendfaili nimi\n"); PrintUsage(); exit(0); } } if (CFSAString("-s") == argv[i]) { if (i + 1 < argc) { samplerate(fr, fp, alpha, atoi(argv[i + 1])); } } if (CFSAString("-r") == argv[i]) { if (i + 1 < argc) { speed = atof(argv[i + 1]); } } if (CFSAString("-ht") == argv[i]) { if (i + 1 < argc) { ht = atof(argv[i + 1]); } } if (CFSAString("-gvw1") == argv[i]) { if (i + 1 < argc) { gvw1 = atof(argv[i + 1]); } } if (CFSAString("-gvw2") == argv[i]) { if (i + 1 < argc) { gvw2 = atof(argv[i + 1]); } } if (CFSAString("-debug") == argv[i]) { print_label = true; } if (CFSAString("-utt") == argv[i]) { print_utt = true; } if (CFSAString("-raw") == argv[i]) { write_raw = true; } if (CFSAString("-dur") == argv[i]) { if (i + 1 < argc) { dur_fname = argv[i + 1]; cfileexists(dur_fname); write_durlabel = true; } else { fprintf(stderr, "Viga: puudb kestustefaili nimi\n"); PrintUsage(); exit(0); } } } Linguistic.Open(LexFileName); Disambiguator.Open(LexDFileName); CFSWString text; ReadUTF8Text(text, in_fname); HTS_Engine_initialize(&engine); if (HTS_Engine_load(&engine, fn_voices, 1) != TRUE) { fprintf(stderr, "Viga: puudub *.htsvoice. %p\n", fn_voices[0]); free(fn_voices); HTS_Engine_clear(&engine); exit(1); } free(fn_voices); HTS_Engine_set_sampling_frequency(&engine, (size_t) fr); HTS_Engine_set_phoneme_alignment_flag(&engine, FALSE); HTS_Engine_set_fperiod(&engine, (size_t) fp); HTS_Engine_set_alpha(&engine, alpha); HTS_Engine_set_beta(&engine, beta); HTS_Engine_set_speed(&engine, speed); HTS_Engine_add_half_tone(&engine, ht); HTS_Engine_set_msd_threshold(&engine, 1, th); /* HTS_Engine_set_duration_interpolation_weight(&engine, 1, diw); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 0, piw1); HTS_Engine_set_parameter_interpolation_weight(&engine, 0, 1, piw2); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 0, giw1); HTS_Engine_set_gv_interpolation_weight(&engine, 0, 1, giw2); */ HTS_Engine_set_gv_weight(&engine, 0, gvw1); HTS_Engine_set_gv_weight(&engine, 1, gvw2); text = DealWithText(text); CFSArray<CFSWString> res = do_utterances(text); INTPTR data_size = 0; outfp = fopen(output_fname, "wb"); if (write_durlabel) durfp = fopen(dur_fname, "w"); if (!write_raw) HTS_Engine_write_header(&engine, outfp, 1); for (INTPTR i = 0; i < res.GetSize(); i++) { CFSArray<CFSWString> label = do_all(res[i], print_label, print_utt); std::vector<std::string> v; v = to_vector(label); std::vector<char*> vc; fill_char_vector(v, vc); size_t n_lines = vc.size(); if (HTS_Engine_synthesize_from_strings(&engine, &vc[0], n_lines) != TRUE) { fprintf(stderr, "Viga: süntees ebaonnestus.\n"); HTS_Engine_clear(&engine); exit(1); } clean_char_vector(vc); data_size += HTS_Engine_engine_speech_size(&engine); if (write_durlabel) HTS_Engine_save_durlabel(&engine, durfp); HTS_Engine_save_generated_speech(&engine, outfp); HTS_Engine_refresh(&engine); } //synth loop if (!write_raw) HTS_Engine_write_header(&engine, outfp, data_size); if (write_durlabel) fclose(durfp); fclose(outfp); HTS_Engine_clear(&engine); Linguistic.Close(); FSCTerminate(); return 0; }