static int synthesize(struct app *app, char *txt) { char buff[MAXBUFLEN]; int label_size; int r = -1; text2mecab(buff, txt); Mecab_analysis(&app->mecab, buff); mecab2njd(&app->njd, Mecab_get_feature(&app->mecab), Mecab_get_size(&app->mecab)); njd_set_pronunciation(&app->njd); njd_set_digit(&app->njd); njd_set_accent_phrase(&app->njd); njd_set_accent_type(&app->njd); njd_set_unvoiced_vowel(&app->njd); njd_set_long_vowel(&app->njd); njd2jpcommon(&app->jpcommon, &app->njd); JPCommon_make_label(&app->jpcommon); label_size = JPCommon_get_label_size(&app->jpcommon); if (label_size > 2) { if (HTS_Engine_synthesize_from_strings( &app->engine, JPCommon_get_label_feature(&app->jpcommon), label_size) == TRUE) { unsigned int pcm_len; r = 0; /* success */ pcm_len = HTS_Engine_get_generated_speech_size( &app->engine); app->pcm = malloc(pcm_len * sizeof(short)); HTS_Engine_get_generated_speech(&app->engine, app->pcm); play_write(app->play_h, app->pcm, pcm_len * sizeof(short)); } if (app->logfp) { fprintf(app->logfp, "[Text analysis result]\n"); NJD_fprint(&app->njd, app->logfp); fprintf(app->logfp, "\n[Output label]\n"); HTS_Engine_save_label(&app->engine, app->logfp); fprintf(app->logfp, "\n"); HTS_Engine_save_information(&app->engine, app->logfp); } HTS_Engine_refresh(&app->engine); } JPCommon_refresh(&app->jpcommon); NJD_refresh(&app->njd); Mecab_refresh(&app->mecab); return r; }
static int Open_JTalk_synthesis(Open_JTalk * open_jtalk, const char *txt, FILE * wavfp, FILE * logfp) { int result = 0; char buff[MAXBUFLEN]; text2mecab(buff, txt); Mecab_analysis(&open_jtalk->mecab, buff); mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab), Mecab_get_size(&open_jtalk->mecab)); njd_set_pronunciation(&open_jtalk->njd); njd_set_digit(&open_jtalk->njd); njd_set_accent_phrase(&open_jtalk->njd); njd_set_accent_type(&open_jtalk->njd); njd_set_unvoiced_vowel(&open_jtalk->njd); njd_set_long_vowel(&open_jtalk->njd); njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd); JPCommon_make_label(&open_jtalk->jpcommon); if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) { if (HTS_Engine_synthesize_from_strings (&open_jtalk->engine, JPCommon_get_label_feature(&open_jtalk->jpcommon), JPCommon_get_label_size(&open_jtalk->jpcommon)) == TRUE) result = 1; if (wavfp != NULL) HTS_Engine_save_riff(&open_jtalk->engine, wavfp); if (logfp != NULL) { fprintf(logfp, "[Text analysis result]\n"); NJD_fprint(&open_jtalk->njd, logfp); fprintf(logfp, "\n[Output label]\n"); HTS_Engine_save_label(&open_jtalk->engine, logfp); fprintf(logfp, "\n"); HTS_Engine_save_information(&open_jtalk->engine, logfp); } HTS_Engine_refresh(&open_jtalk->engine); } JPCommon_refresh(&open_jtalk->jpcommon); NJD_refresh(&open_jtalk->njd); Mecab_refresh(&open_jtalk->mecab); return result; }
int htsSynthesize(int argc, char **argv) { int i; double f; /* hts_engine API */ HTS_Engine engine; /* HTS voices */ size_t num_voices; char **fn_voices; /* input label file name */ char *labfn = NULL; /* output file pointers */ FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL; /* interpolation weights */ size_t num_interpolation_weights; /* output usage */ if (argc <= 1) usage(); /* initialize hts_engine API */ HTS_Engine_initialize(&engine); /* get HTS voice file names */ num_voices = 0; fn_voices = (char **) malloc(argc * sizeof(char *)); for (i = 0; i < argc; i++) { if (argv[i][0] == '-' && argv[i][1] == 'm') fn_voices[num_voices++] = argv[++i]; if (argv[i][0] == '-' && argv[i][1] == 'h') usage(); } if (num_voices == 0) { fprintf(stderr, "Error: HTS voice must be specified.\n"); free(fn_voices); return (-1); } /* load HTS voices */ if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) { fprintf(stderr, "Error: HTS voices cannot be loaded.\n"); free(fn_voices); HTS_Engine_clear(&engine); return (-1); } free(fn_voices); /* get options */ while (--argc) { if (**++argv == '-') { switch (*(*argv + 1)) { case 'v': switch (*(*argv + 2)) { case 'p': HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE); break; default: fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } break; case 'o': switch (*(*argv + 2)) { case 'w': wavfp = fopen(*++argv, "wb"); break; case 'r': rawfp = fopen(*++argv, "wb"); break; case 'd': durfp = fopen(*++argv, "wt"); break; case 'm': mgcfp = fopen(*++argv, "wb"); break; case 'f': case 'p': lf0fp = fopen(*++argv, "wb"); break; case 'l': lpffp = fopen(*++argv, "wb"); break; case 't': tracefp = fopen(*++argv, "wt"); break; default: fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'h': usage(); break; case 'm': argv++; /* HTS voices were already loaded */ --argc; break; case 's': HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv)); --argc; break; case 'p': HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv)); --argc; break; case 'a': HTS_Engine_set_alpha(&engine, atof(*++argv)); --argc; break; case 'b': HTS_Engine_set_beta(&engine, atof(*++argv)); --argc; break; case 'r': HTS_Engine_set_speed(&engine, atof(*++argv)); --argc; break; case 'f': switch (*(*argv + 2)) { case 'm': HTS_Engine_add_half_tone(&engine, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return (-1); } --argc; break; case 'u': HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv)); --argc; break; case 'i': num_interpolation_weights = atoi(*++argv); argc--; if (num_interpolation_weights != num_voices) { HTS_Engine_clear(&engine); return(-1); } for (i = 0; i < (int) num_interpolation_weights; i++) { f = atof(*++argv); argc--; HTS_Engine_set_duration_interpolation_weight(&engine, i, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f); HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f); } break; case 'j': switch (*(*argv + 2)) { case 'm': HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv)); break; case 'f': case 'p': HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv)); break; default: fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2)); HTS_Engine_clear(&engine); return(-1); } --argc; break; case 'g': HTS_Engine_set_volume(&engine, atof(*++argv)); --argc; break; case 'z': HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv)); --argc; break; default: fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1)); HTS_Engine_clear(&engine); return(-1); } } else { labfn = *argv; } } /* synthesize */ if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) { fprintf(stderr, "Error: waveform cannot be synthesized.\n"); HTS_Engine_clear(&engine); return(-1); } /* output */ if (tracefp != NULL) HTS_Engine_save_information(&engine, tracefp); if (durfp != NULL) HTS_Engine_save_label(&engine, durfp); if (rawfp) HTS_Engine_save_generated_speech(&engine, rawfp); if (wavfp) HTS_Engine_save_riff(&engine, wavfp); if (mgcfp) HTS_Engine_save_generated_parameter(&engine, 0, mgcfp); if (lf0fp) HTS_Engine_save_generated_parameter(&engine, 1, lf0fp); if (lpffp) HTS_Engine_save_generated_parameter(&engine, 2, lpffp); /* reset */ HTS_Engine_refresh(&engine); /* free memory */ HTS_Engine_clear(&engine); /* close files */ if (durfp != NULL) fclose(durfp); if (mgcfp != NULL) fclose(mgcfp); if (lf0fp != NULL) fclose(lf0fp); if (lpffp != NULL) fclose(lpffp); if (wavfp != NULL) fclose(wavfp); if (rawfp != NULL) fclose(rawfp); if (tracefp != NULL) fclose(tracefp); return 0; }