Exemplo n.º 1
0
void TextToSpeech::synthesis(char *txt, FILE * wavfp)
{
	char buff[MAXBUFLEN];

	text2mecab(buff, txt);
	Mecab_analysis(&open_jtalk_.mecab, buff);
	mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab));
	njd_set_pronunciation(&open_jtalk_.njd);
	njd_set_digit(&open_jtalk_.njd);
	njd_set_accent_phrase(&open_jtalk_.njd);
	njd_set_accent_type(&open_jtalk_.njd);
	njd_set_unvoiced_vowel(&open_jtalk_.njd);
	njd_set_long_vowel(&open_jtalk_.njd);
	njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd);
	JPCommon_make_label(&open_jtalk_.jpcommon);
	if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) {
		HTS_Engine_load_label_from_string_list(
			&open_jtalk_.engine,
			JPCommon_get_label_feature(&open_jtalk_.jpcommon),
			JPCommon_get_label_size(&open_jtalk_.jpcommon)
		);
		HTS_Engine_create_sstream(&open_jtalk_.engine);
		HTS_Engine_create_pstream(&open_jtalk_.engine);
		HTS_Engine_create_gstream(&open_jtalk_.engine);
		if (wavfp != NULL)
			HTS_Engine_save_riff(&open_jtalk_.engine, wavfp);
		HTS_Engine_refresh(&open_jtalk_.engine);
	}
	JPCommon_refresh(&open_jtalk_.jpcommon);
	NJD_refresh(&open_jtalk_.njd);
	Mecab_refresh(&open_jtalk_.mecab);
}
Exemplo n.º 2
0
int OpenJTalk_synthesis_towav(OpenJTalk** openjtalk,const char* text, const char* wavfilename)
{
   char buff[MAXBUFLEN];
   FILE * wavfp;
   wavfp = fopen(wavfilename,"wb");
   if (!wavfp)
   {
       sprintf((*openjtalk)->errorout,"can not open %s.",wavfilename);
       return 0;
   }

   text2mecab(buff, (char*)text);
   Mecab_analysis((*openjtalk)->mecab, buff);
   mecab2njd(&(*openjtalk)->njd, Mecab_get_feature((*openjtalk)->mecab),
             Mecab_get_size((*openjtalk)->mecab));
   njd_set_pronunciation(&(*openjtalk)->njd);
   njd_set_digit(&(*openjtalk)->njd);
   njd_set_accent_phrase(&(*openjtalk)->njd);
   njd_set_accent_type(&(*openjtalk)->njd);
   njd_set_unvoiced_vowel(&(*openjtalk)->njd);
   njd_set_long_vowel(&(*openjtalk)->njd);
   njd2jpcommon(&(*openjtalk)->jpcommon, &(*openjtalk)->njd);
   JPCommon_make_label(&(*openjtalk)->jpcommon);
   if (JPCommon_get_label_size(&(*openjtalk)->jpcommon) > 2) {
      HTS_Engine_load_label_from_string_list(&(*openjtalk)->engine,
                                             JPCommon_get_label_feature(&(*openjtalk)->jpcommon),
                                             JPCommon_get_label_size(&(*openjtalk)->jpcommon));
      HTS_Engine_create_sstream(&(*openjtalk)->engine);
      HTS_Engine_create_pstream(&(*openjtalk)->engine);
      HTS_Engine_create_gstream(&(*openjtalk)->engine);

      HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp);
/*
      if (wavfp != NULL)
         HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp);
      if (logfp != NULL) {
         fprintf(logfp, "[Text analysis result]\n");
         NJD_fprint(&(*openjtalk)->njd, logfp);
         fprintf(logfp, "\n[Output label]\n");
         HTS_Engine_save_label(&(*openjtalk)->engine, logfp);
         fprintf(logfp, "\n");
         HTS_Engine_save_information(&(*openjtalk)->engine, logfp);
      }
*/
      HTS_Engine_refresh(&(*openjtalk)->engine);
   }
   JPCommon_refresh(&(*openjtalk)->jpcommon);
   NJD_refresh(&(*openjtalk)->njd);
   Mecab_refresh((*openjtalk)->mecab);

   fclose(wavfp);
   return 1;
}
Exemplo n.º 3
0
/* Flite_HTS_Engine_synthesize: synthesize speech */
HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav)
{
   int i;
   FILE *fp;
   cst_voice *v = NULL;
   cst_utterance *u = NULL;
   cst_item *s = NULL;
   char **label_data = NULL;
   int label_size = 0;

   if (txt == NULL)
      return FALSE;

   /* text analysis part */
   v = REGISTER_VOX(NULL);
   if (v == NULL)
      return FALSE;
   u = flite_synth_text(txt, v);
   if (u == NULL) {
      UNREGISTER_VOX(v);
      return FALSE;
   }
   for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
      label_size++;
   if (label_size <= 0) {
      delete_utterance(u);
      UNREGISTER_VOX(v);
      return FALSE;
   }
   label_data = (char **) calloc(label_size, sizeof(char *));
   for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) {
      label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
      Flite_HTS_Engine_create_label(f, s, label_data[i]);
   }

   /* speech synthesis part */
   HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size);
   if (wav != NULL) {
      fp = fopen(wav, "wb");
      HTS_Engine_save_riff(&f->engine, fp);
      fclose(fp);
   }
   HTS_Engine_refresh(&f->engine);

   for (i = 0; i < label_size; i++)
      free(label_data[i]);
   free(label_data);

   delete_utterance(u);
   UNREGISTER_VOX(v);

   return TRUE;
}
/* Flite_HTS_Engine_synthesis: speech synthesis */
void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp)
{
    int i;
    cst_voice *v = NULL;
    cst_utterance *u = NULL;
    cst_item *s = NULL;
    char **label_data = NULL;
    int label_size = 0;

    /* text analysis part */
    v = REGISTER_VOX(NULL);
    if (v == NULL)
        return;
    u = flite_synth_text(txt, v);
    if (u == NULL)
        return;
    for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
        label_size++;
    if (label_size <= 0)
        return;
    label_data = (char **) calloc(label_size, sizeof(char *));
    for (i = 0, s = relation_head(utt_relation(u, "Segment")); s;
            s = item_next(s), i++) {
        label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
        Flite_HTS_Engine_create_label(f, s, label_data[i]);
    }

    /* speech synthesis part */
    HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size);
    HTS_Engine_create_sstream(&f->engine);
    HTS_Engine_create_pstream(&f->engine);
    HTS_Engine_create_gstream(&f->engine);
    if (wavfp != NULL)
        HTS_Engine_save_riff(&f->engine, wavfp);

    HTS_Engine_refresh(&f->engine);

    for (i = 0; i < label_size; i++)
        free(label_data[i]);
    free(label_data);

    delete_utterance(u);
    UNREGISTER_VOX(v);
}
Exemplo n.º 5
0
static int Open_JTalk_synthesis(Open_JTalk * open_jtalk, const char *txt, FILE * wavfp,
                                FILE * logfp)
{
   int result = 0;
   char buff[MAXBUFLEN];

   text2mecab(buff, txt);
   Mecab_analysis(&open_jtalk->mecab, buff);
   mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab),
             Mecab_get_size(&open_jtalk->mecab));
   njd_set_pronunciation(&open_jtalk->njd);
   njd_set_digit(&open_jtalk->njd);
   njd_set_accent_phrase(&open_jtalk->njd);
   njd_set_accent_type(&open_jtalk->njd);
   njd_set_unvoiced_vowel(&open_jtalk->njd);
   njd_set_long_vowel(&open_jtalk->njd);
   njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd);
   JPCommon_make_label(&open_jtalk->jpcommon);
   if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) {
      if (HTS_Engine_synthesize_from_strings
          (&open_jtalk->engine, JPCommon_get_label_feature(&open_jtalk->jpcommon),
           JPCommon_get_label_size(&open_jtalk->jpcommon)) == TRUE)
         result = 1;
      if (wavfp != NULL)
         HTS_Engine_save_riff(&open_jtalk->engine, wavfp);
      if (logfp != NULL) {
         fprintf(logfp, "[Text analysis result]\n");
         NJD_fprint(&open_jtalk->njd, logfp);
         fprintf(logfp, "\n[Output label]\n");
         HTS_Engine_save_label(&open_jtalk->engine, logfp);
         fprintf(logfp, "\n");
         HTS_Engine_save_information(&open_jtalk->engine, logfp);
      }
      HTS_Engine_refresh(&open_jtalk->engine);
   }
   JPCommon_refresh(&open_jtalk->jpcommon);
   NJD_refresh(&open_jtalk->njd);
   Mecab_refresh(&open_jtalk->mecab);

   return result;
}
Exemplo n.º 6
0
int htsSynthesize(int argc, char **argv)
{
	int i;
	double f;

	/* hts_engine API */
	HTS_Engine engine;

	/* HTS voices */
	size_t num_voices;
	char **fn_voices;

	/* input label file name */
	char *labfn = NULL;

	/* output file pointers */
	FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;

	/* interpolation weights */
	size_t num_interpolation_weights;

	/* output usage */
	if (argc <= 1)
		usage();

	/* initialize hts_engine API */
	HTS_Engine_initialize(&engine);

	/* get HTS voice file names */
	num_voices = 0;
	fn_voices = (char **) malloc(argc * sizeof(char *));
	for (i = 0; i < argc; i++)
	{
		if (argv[i][0] == '-' && argv[i][1] == 'm')
			fn_voices[num_voices++] = argv[++i];
		if (argv[i][0] == '-' && argv[i][1] == 'h')
			usage();
	}
	if (num_voices == 0)
	{
		fprintf(stderr, "Error: HTS voice must be specified.\n");
		free(fn_voices);
		return (-1);
	}

	/* load HTS voices */
	if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE)
	{
		fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
		free(fn_voices);
		HTS_Engine_clear(&engine);
		return (-1);
	}
	free(fn_voices);

	/* get options */
	while (--argc)
	{
		if (**++argv == '-')
		{
			switch (*(*argv + 1))
			{
			case 'v':
				switch (*(*argv + 2))
				{
				case 'p':
					HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
					break;
				default:
					fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
					HTS_Engine_clear(&engine);
					return (-1);
				}
				break;
			case 'o':
				switch (*(*argv + 2))
				{
				case 'w':
					wavfp = fopen(*++argv, "wb");
					break;
				case 'r':
					rawfp = fopen(*++argv, "wb");
					break;
				case 'd':
					durfp = fopen(*++argv, "wt");
					break;
				case 'm':
					mgcfp = fopen(*++argv, "wb");
					break;
				case 'f':
				case 'p':
					lf0fp = fopen(*++argv, "wb");
					break;
				case 'l':
					lpffp = fopen(*++argv, "wb");
					break;
				case 't':
					tracefp = fopen(*++argv, "wt");
					break;
				default:
					fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
					HTS_Engine_clear(&engine);
					return (-1);
				}
				--argc;
				break;
			case 'h':
				usage();
				break;
			case 'm':
				argv++; /* HTS voices were already loaded */
				--argc;
				break;
			case 's':
				HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
				--argc;
				break;
			case 'p':
				HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
				--argc;
				break;
			case 'a':
				HTS_Engine_set_alpha(&engine, atof(*++argv));
				--argc;
				break;
			case 'b':
				HTS_Engine_set_beta(&engine, atof(*++argv));
				--argc;
				break;
			case 'r':
				HTS_Engine_set_speed(&engine, atof(*++argv));
				--argc;
				break;
			case 'f':
				switch (*(*argv + 2))
				{
				case 'm':
					HTS_Engine_add_half_tone(&engine, atof(*++argv));
					break;
				default:
					fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
					HTS_Engine_clear(&engine);
					return (-1);
				}
				--argc;
				break;
			case 'u':
				HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
				--argc;
				break;
			case 'i':
				num_interpolation_weights = atoi(*++argv);
				argc--;
				if (num_interpolation_weights != num_voices)
				{
					HTS_Engine_clear(&engine);
					return(-1);
				}
				for (i = 0; i < (int) num_interpolation_weights; i++)
				{
					f = atof(*++argv);
					argc--;
					HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
					HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
					HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
					HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
					HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
				}
				break;
			case 'j':
				switch (*(*argv + 2))
				{
				case 'm':
					HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
					break;
				case 'f':
				case 'p':
					HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
					break;
				default:
					fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
					HTS_Engine_clear(&engine);
					return(-1);
				}
				--argc;
				break;
			case 'g':
				HTS_Engine_set_volume(&engine, atof(*++argv));
				--argc;
				break;
			case 'z':
				HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
				--argc;
				break;
			default:
				fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
				HTS_Engine_clear(&engine);
				return(-1);
			}
		}
		else
		{
			labfn = *argv;
		}
	}

	/* synthesize */
	if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE)
	{
		fprintf(stderr, "Error: waveform cannot be synthesized.\n");
		HTS_Engine_clear(&engine);
		return(-1);
	}

	/* output */
	if (tracefp != NULL)
		HTS_Engine_save_information(&engine, tracefp);
	if (durfp != NULL)
		HTS_Engine_save_label(&engine, durfp);
	if (rawfp)
		HTS_Engine_save_generated_speech(&engine, rawfp);
	if (wavfp)
		HTS_Engine_save_riff(&engine, wavfp);
	if (mgcfp)
		HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
	if (lf0fp)
		HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
	if (lpffp)
		HTS_Engine_save_generated_parameter(&engine, 2, lpffp);

	/* reset */
	HTS_Engine_refresh(&engine);

	/* free memory */
	HTS_Engine_clear(&engine);

	/* close files */
	if (durfp != NULL)
		fclose(durfp);
	if (mgcfp != NULL)
		fclose(mgcfp);
	if (lf0fp != NULL)
		fclose(lf0fp);
	if (lpffp != NULL)
		fclose(lpffp);
	if (wavfp != NULL)
		fclose(wavfp);
	if (rawfp != NULL)
		fclose(rawfp);
	if (tracefp != NULL)
		fclose(tracefp);

	return 0;
}