void TextToSpeech::synthesis(char *txt, FILE * wavfp) { char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk_.mecab, buff); mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab)); njd_set_pronunciation(&open_jtalk_.njd); njd_set_digit(&open_jtalk_.njd); njd_set_accent_phrase(&open_jtalk_.njd); njd_set_accent_type(&open_jtalk_.njd); njd_set_unvoiced_vowel(&open_jtalk_.njd); njd_set_long_vowel(&open_jtalk_.njd); njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd); JPCommon_make_label(&open_jtalk_.jpcommon); if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) { HTS_Engine_load_label_from_string_list( &open_jtalk_.engine, JPCommon_get_label_feature(&open_jtalk_.jpcommon), JPCommon_get_label_size(&open_jtalk_.jpcommon) ); HTS_Engine_create_sstream(&open_jtalk_.engine); HTS_Engine_create_pstream(&open_jtalk_.engine); HTS_Engine_create_gstream(&open_jtalk_.engine); if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk_.engine, wavfp); HTS_Engine_refresh(&open_jtalk_.engine); } JPCommon_refresh(&open_jtalk_.jpcommon); NJD_refresh(&open_jtalk_.njd); Mecab_refresh(&open_jtalk_.mecab); }
int OpenJTalk_synthesis_towav(OpenJTalk** openjtalk,const char* text, const char* wavfilename) { char buff[MAXBUFLEN]; FILE * wavfp; wavfp = fopen(wavfilename,"wb"); if (!wavfp) { sprintf((*openjtalk)->errorout,"can not open %s.",wavfilename); return 0; } text2mecab(buff, (char*)text); Mecab_analysis((*openjtalk)->mecab, buff); mecab2njd(&(*openjtalk)->njd, Mecab_get_feature((*openjtalk)->mecab), Mecab_get_size((*openjtalk)->mecab)); njd_set_pronunciation(&(*openjtalk)->njd); njd_set_digit(&(*openjtalk)->njd); njd_set_accent_phrase(&(*openjtalk)->njd); njd_set_accent_type(&(*openjtalk)->njd); njd_set_unvoiced_vowel(&(*openjtalk)->njd); njd_set_long_vowel(&(*openjtalk)->njd); njd2jpcommon(&(*openjtalk)->jpcommon, &(*openjtalk)->njd); JPCommon_make_label(&(*openjtalk)->jpcommon); if (JPCommon_get_label_size(&(*openjtalk)->jpcommon) > 2) { HTS_Engine_load_label_from_string_list(&(*openjtalk)->engine, JPCommon_get_label_feature(&(*openjtalk)->jpcommon), JPCommon_get_label_size(&(*openjtalk)->jpcommon)); HTS_Engine_create_sstream(&(*openjtalk)->engine); HTS_Engine_create_pstream(&(*openjtalk)->engine); HTS_Engine_create_gstream(&(*openjtalk)->engine); HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp); /* if (wavfp != NULL) HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&(*openjtalk)->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&(*openjtalk)->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&(*openjtalk)->engine, logfp); } */ HTS_Engine_refresh(&(*openjtalk)->engine); } JPCommon_refresh(&(*openjtalk)->jpcommon); NJD_refresh(&(*openjtalk)->njd); Mecab_refresh((*openjtalk)->mecab); fclose(wavfp); return 1; }
/* Flite_HTS_Engine_synthesize: synthesize speech */ HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav) { int i; FILE *fp; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; if (txt == NULL) return FALSE; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return FALSE; u = flite_synth_text(txt, v); if (u == NULL) { UNREGISTER_VOX(v); return FALSE; } for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) { delete_utterance(u); UNREGISTER_VOX(v); return FALSE; } label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size); if (wav != NULL) { fp = fopen(wav, "wb"); HTS_Engine_save_riff(&f->engine, fp); fclose(fp); } HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); return TRUE; }
/* Flite_HTS_Engine_synthesis: speech synthesis */ void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp) { int i; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return; u = flite_synth_text(txt, v); if (u == NULL) return; for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) return; label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size); HTS_Engine_create_sstream(&f->engine); HTS_Engine_create_pstream(&f->engine); HTS_Engine_create_gstream(&f->engine); if (wavfp != NULL) HTS_Engine_save_riff(&f->engine, wavfp); HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); }
static int Open_JTalk_synthesis(Open_JTalk * open_jtalk, const char *txt, FILE * wavfp, FILE * logfp) { int result = 0; char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk->mecab, buff); mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab), Mecab_get_size(&open_jtalk->mecab)); njd_set_pronunciation(&open_jtalk->njd); njd_set_digit(&open_jtalk->njd); njd_set_accent_phrase(&open_jtalk->njd); njd_set_accent_type(&open_jtalk->njd); njd_set_unvoiced_vowel(&open_jtalk->njd); njd_set_long_vowel(&open_jtalk->njd); njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd); JPCommon_make_label(&open_jtalk->jpcommon); if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) { if (HTS_Engine_synthesize_from_strings (&open_jtalk->engine, JPCommon_get_label_feature(&open_jtalk->jpcommon), JPCommon_get_label_size(&open_jtalk->jpcommon)) == TRUE) result = 1; if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&open_jtalk->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&open_jtalk->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&open_jtalk->engine, logfp); } HTS_Engine_refresh(&open_jtalk->engine); } JPCommon_refresh(&open_jtalk->jpcommon); NJD_refresh(&open_jtalk->njd); Mecab_refresh(&open_jtalk->mecab); return result; }
int htsSynthesize(int argc, char **argv) { int i; double f; /* hts_engine API */ HTS_Engine engine; /* HTS voices */ size_t num_voices; char **fn_voices; /* input label file name */ char *labfn = NULL; /* output file pointers */ FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL; /* interpolation weights */ size_t num_interpolation_weights; /* output usage */ if (argc <= 1) usage(); /* initialize hts_engine API */ HTS_Engine_initialize(&engine); /* get HTS voice file names */ num_voices = 0; fn_voices = (char **) malloc(argc * sizeof(char *)); for (i = 0; i < argc; i++) { if (argv[i][0] == '-' && argv[i][1] == 'm') fn_voices[num_voices++] = argv[++i]; if (argv[i][0] == '-' && argv[i][1] == 'h') usage(); } if (num_voices == 0) { fprintf(stderr, "Error: HTS voice must be specified.\n"); free(fn_voices); return (-1); } /* load HTS voices */ if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) { fprintf(stderr, "Error: HTS voices cannot be loaded.\n"); free(fn_voices); HTS_Engine_clear(&engine); return (-1); } free(fn_voices); /* get options */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE); break; default: fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } break; case 'o': switch (*(*argv + 2)) { case 'w': wavfp = fopen(*++argv, "wb"); break; case 'r': rawfp = fopen(*++argv, "wb"); break; case 'd': durfp = fopen(*++argv, "wt"); break; case 'm': mgcfp = fopen(*++argv, "wb"); break; case 'f': case 'p': lf0fp = fopen(*++argv, "wb"); break; case 'l': lpffp = fopen(*++argv, "wb"); break; case 't': tracefp = fopen(*++argv, "wt"); break; default: fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'h': usage(); break; case 'm': argv++; /* HTS voices were already loaded */ --argc; break; case 's': HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv)); --argc; break; case 'p': HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv)); --argc; break; case 'a': HTS_Engine_set_alpha(&engine, atof(*++argv)); --argc; break; case 'b': HTS_Engine_set_beta(&engine, atof(*++argv)); --argc; break; case 'r': HTS_Engine_set_speed(&engine, atof(*++argv)); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': HTS_Engine_add_half_tone(&engine, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'u': HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv)); --argc; break; case 'i': num_interpolation_weights = atoi(*++argv); argc--; if (num_interpolation_weights != num_voices) { HTS_Engine_clear(&engine); return(-1); } for (i = 0; i < (int) num_interpolation_weights; i++) { f = atof(*++argv); argc--; HTS_Engine_set_duration_interpolation_weight(&engine, i, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f); } break; case 'j': switch (*(*argv + 2)) { case 'm': HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv)); break; case 'f': case 'p': HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return(-1); } --argc; break; case 'g': HTS_Engine_set_volume(&engine, atof(*++argv)); --argc; break; case 'z': HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv)); --argc; break; default: fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1)); HTS_Engine_clear(&engine); return(-1); } } else { labfn = *argv; } } /* synthesize */ if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) { fprintf(stderr, "Error: waveform cannot be synthesized.\n"); HTS_Engine_clear(&engine); return(-1); } /* output */ if (tracefp != NULL) HTS_Engine_save_information(&engine, tracefp); if (durfp != NULL) HTS_Engine_save_label(&engine, durfp); if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); if (wavfp) HTS_Engine_save_riff(&engine, wavfp); if (mgcfp) HTS_Engine_save_generated_parameter(&engine, 0, mgcfp); if (lf0fp) HTS_Engine_save_generated_parameter(&engine, 1, lf0fp); if (lpffp) HTS_Engine_save_generated_parameter(&engine, 2, lpffp); /* reset */ HTS_Engine_refresh(&engine); /* free memory */ HTS_Engine_clear(&engine); /* close files */ if (durfp != NULL) fclose(durfp); if (mgcfp != NULL) fclose(mgcfp); if (lf0fp != NULL) fclose(lf0fp); if (lpffp != NULL) fclose(lpffp); if (wavfp != NULL) fclose(wavfp); if (rawfp != NULL) fclose(rawfp); if (tracefp != NULL) fclose(tracefp); return 0; }