Exemplo n.º 1
0
void TextToSpeech::synthesis(char *txt, FILE * wavfp)
{
	char buff[MAXBUFLEN];

	text2mecab(buff, txt);
	Mecab_analysis(&open_jtalk_.mecab, buff);
	mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab));
	njd_set_pronunciation(&open_jtalk_.njd);
	njd_set_digit(&open_jtalk_.njd);
	njd_set_accent_phrase(&open_jtalk_.njd);
	njd_set_accent_type(&open_jtalk_.njd);
	njd_set_unvoiced_vowel(&open_jtalk_.njd);
	njd_set_long_vowel(&open_jtalk_.njd);
	njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd);
	JPCommon_make_label(&open_jtalk_.jpcommon);
	if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) {
		HTS_Engine_load_label_from_string_list(
			&open_jtalk_.engine,
			JPCommon_get_label_feature(&open_jtalk_.jpcommon),
			JPCommon_get_label_size(&open_jtalk_.jpcommon)
		);
		HTS_Engine_create_sstream(&open_jtalk_.engine);
		HTS_Engine_create_pstream(&open_jtalk_.engine);
		HTS_Engine_create_gstream(&open_jtalk_.engine);
		if (wavfp != NULL)
			HTS_Engine_save_riff(&open_jtalk_.engine, wavfp);
		HTS_Engine_refresh(&open_jtalk_.engine);
	}
	JPCommon_refresh(&open_jtalk_.jpcommon);
	NJD_refresh(&open_jtalk_.njd);
	Mecab_refresh(&open_jtalk_.mecab);
}
Exemplo n.º 2
0
void TextToSpeech::synthesis(const char *txt)
{
	char buff[MAXBUFLEN];

	text2mecab(buff, txt);
	Mecab_analysis(&open_jtalk_.mecab, buff);
	mecab2njd(&open_jtalk_.njd, Mecab_get_feature(&open_jtalk_.mecab), Mecab_get_size(&open_jtalk_.mecab));
	njd_set_pronunciation(&open_jtalk_.njd);
	njd_set_digit(&open_jtalk_.njd);
	njd_set_accent_phrase(&open_jtalk_.njd);
	njd_set_accent_type(&open_jtalk_.njd);
	njd_set_unvoiced_vowel(&open_jtalk_.njd);
	njd_set_long_vowel(&open_jtalk_.njd);
	njd2jpcommon(&open_jtalk_.jpcommon, &open_jtalk_.njd);
	JPCommon_make_label(&open_jtalk_.jpcommon);
	if (JPCommon_get_label_size(&open_jtalk_.jpcommon) > 2) {
		unsigned int pcm_len;
		HTS_Engine_load_label_from_string_list(
			&open_jtalk_.engine,
			JPCommon_get_label_feature(&open_jtalk_.jpcommon),
			JPCommon_get_label_size(&open_jtalk_.jpcommon)
		);
		HTS_Engine_create_sstream(&open_jtalk_.engine);
		HTS_Engine_create_pstream(&open_jtalk_.engine);
		HTS_Engine_create_gstream(&open_jtalk_.engine);
		pcm_len = HTS_Engine_get_generated_speech_size(&open_jtalk_.engine);
		pcm_ = new short[pcm_len];
		HTS_Engine_get_generated_speech(&open_jtalk_.engine, pcm_);
		play_write(play_h_, pcm_, pcm_len * sizeof(short));
		HTS_Engine_refresh(&open_jtalk_.engine);
	}
	JPCommon_refresh(&open_jtalk_.jpcommon);
	NJD_refresh(&open_jtalk_.njd);
	Mecab_refresh(&open_jtalk_.mecab);
}
Exemplo n.º 3
0
int OpenJTalk_synthesis_towav(OpenJTalk** openjtalk,const char* text, const char* wavfilename)
{
   char buff[MAXBUFLEN];
   FILE * wavfp;
   wavfp = fopen(wavfilename,"wb");
   if (!wavfp)
   {
       sprintf((*openjtalk)->errorout,"can not open %s.",wavfilename);
       return 0;
   }

   text2mecab(buff, (char*)text);
   Mecab_analysis((*openjtalk)->mecab, buff);
   mecab2njd(&(*openjtalk)->njd, Mecab_get_feature((*openjtalk)->mecab),
             Mecab_get_size((*openjtalk)->mecab));
   njd_set_pronunciation(&(*openjtalk)->njd);
   njd_set_digit(&(*openjtalk)->njd);
   njd_set_accent_phrase(&(*openjtalk)->njd);
   njd_set_accent_type(&(*openjtalk)->njd);
   njd_set_unvoiced_vowel(&(*openjtalk)->njd);
   njd_set_long_vowel(&(*openjtalk)->njd);
   njd2jpcommon(&(*openjtalk)->jpcommon, &(*openjtalk)->njd);
   JPCommon_make_label(&(*openjtalk)->jpcommon);
   if (JPCommon_get_label_size(&(*openjtalk)->jpcommon) > 2) {
      HTS_Engine_load_label_from_string_list(&(*openjtalk)->engine,
                                             JPCommon_get_label_feature(&(*openjtalk)->jpcommon),
                                             JPCommon_get_label_size(&(*openjtalk)->jpcommon));
      HTS_Engine_create_sstream(&(*openjtalk)->engine);
      HTS_Engine_create_pstream(&(*openjtalk)->engine);
      HTS_Engine_create_gstream(&(*openjtalk)->engine);

      HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp);
/*
      if (wavfp != NULL)
         HTS_Engine_save_riff(&(*openjtalk)->engine, wavfp);
      if (logfp != NULL) {
         fprintf(logfp, "[Text analysis result]\n");
         NJD_fprint(&(*openjtalk)->njd, logfp);
         fprintf(logfp, "\n[Output label]\n");
         HTS_Engine_save_label(&(*openjtalk)->engine, logfp);
         fprintf(logfp, "\n");
         HTS_Engine_save_information(&(*openjtalk)->engine, logfp);
      }
*/
      HTS_Engine_refresh(&(*openjtalk)->engine);
   }
   JPCommon_refresh(&(*openjtalk)->jpcommon);
   NJD_refresh(&(*openjtalk)->njd);
   Mecab_refresh((*openjtalk)->mecab);

   fclose(wavfp);
   return 1;
}
/* Flite_HTS_Engine_synthesis: speech synthesis */
void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp)
{
    int i;
    cst_voice *v = NULL;
    cst_utterance *u = NULL;
    cst_item *s = NULL;
    char **label_data = NULL;
    int label_size = 0;

    /* text analysis part */
    v = REGISTER_VOX(NULL);
    if (v == NULL)
        return;
    u = flite_synth_text(txt, v);
    if (u == NULL)
        return;
    for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
        label_size++;
    if (label_size <= 0)
        return;
    label_data = (char **) calloc(label_size, sizeof(char *));
    for (i = 0, s = relation_head(utt_relation(u, "Segment")); s;
            s = item_next(s), i++) {
        label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
        Flite_HTS_Engine_create_label(f, s, label_data[i]);
    }

    /* speech synthesis part */
    HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size);
    HTS_Engine_create_sstream(&f->engine);
    HTS_Engine_create_pstream(&f->engine);
    HTS_Engine_create_gstream(&f->engine);
    if (wavfp != NULL)
        HTS_Engine_save_riff(&f->engine, wavfp);

    HTS_Engine_refresh(&f->engine);

    for (i = 0; i < label_size; i++)
        free(label_data[i]);
    free(label_data);

    delete_utterance(u);
    UNREGISTER_VOX(v);
}
void OpenJTalk_synthesis(OpenJTalk * open_jtalk, char *txt, FILE * wavfp, FILE * logfp)
{
   char buff[MAXBUFLEN];

   text2mecab(buff, txt);
   Mecab_analysis(&open_jtalk->mecab, buff);
   mecab2njd(&open_jtalk->njd, Mecab_get_feature(&open_jtalk->mecab),
             Mecab_get_size(&open_jtalk->mecab));
   njd_set_pronunciation(&open_jtalk->njd);
   njd_set_digit(&open_jtalk->njd);
   njd_set_accent_phrase(&open_jtalk->njd);
   njd_set_accent_type(&open_jtalk->njd);
   njd_set_unvoiced_vowel(&open_jtalk->njd);
   njd_set_long_vowel(&open_jtalk->njd);
   njd2jpcommon(&open_jtalk->jpcommon, &open_jtalk->njd);
   JPCommon_make_label(&open_jtalk->jpcommon);
   if (JPCommon_get_label_size(&open_jtalk->jpcommon) > 2) {
      HTS_Engine_load_label_from_string_list(&open_jtalk->engine,
                                             JPCommon_get_label_feature(&open_jtalk->jpcommon),
                                             JPCommon_get_label_size(&open_jtalk->jpcommon));
      HTS_Engine_create_sstream(&open_jtalk->engine);
      HTS_Engine_create_pstream(&open_jtalk->engine);
      HTS_Engine_create_gstream(&open_jtalk->engine);
      if (wavfp != NULL)
         HTS_Engine_save_riff(&open_jtalk->engine, wavfp);
      if (logfp != NULL) {
         fprintf(logfp, "[Text analysis result]\n");
         NJD_fprint(&open_jtalk->njd, logfp);
         fprintf(logfp, "\n[Output label]\n");
         HTS_Engine_save_label(&open_jtalk->engine, logfp);
         fprintf(logfp, "\n");
         HTS_Engine_save_information(&open_jtalk->engine, logfp);
      }
      HTS_Engine_refresh(&open_jtalk->engine);
   }
   JPCommon_refresh(&open_jtalk->jpcommon);
   NJD_refresh(&open_jtalk->njd);
   Mecab_refresh(&open_jtalk->mecab);
}
Exemplo n.º 6
0
static void Run(const SUttProcessor *self, SUtterance *utt,
				s_erc *error)
{
	SHTSEngineMESynthUttProc105 *HTSsynth = (SHTSEngineMESynthUttProc105*)self;
	SPlugin *audioPlugin;
	const SRelation *segmentRel;
	SAudio *audio = NULL;
	s_bool is_present;
	char **label_data = NULL;
	int label_size;
	const SItem *item;
	const SItem *itemItr;
	int counter;
	uint i;
	int frame;
	int state;
	const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate;
	int nstate;


	S_CLR_ERR(error);

	/* we require the segment relation */
	is_present = SUtteranceRelationIsPresent(utt, "Segment", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceRelationIsPresent\" failed"))
		goto quit_error;

	if (!is_present)
	{
		S_CTX_ERR(error, S_FAILURE,
				  "Run",
				  "Failed to find 'Segment' relation in utterance");
		goto quit_error;
	}

	segmentRel = SUtteranceGetRelation(utt, "Segment", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceGetRelation\" failed"))
		goto quit_error;

	item = SRelationHead(segmentRel, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SRelationHead\" failed"))
		goto quit_error;

	itemItr = item;
	label_size = 0;
	while (itemItr != NULL)
	{
		label_size++;
		itemItr = SItemNext(itemItr, error);
	}

	label_data = S_CALLOC(char*, label_size);

	itemItr = item;
	counter = 0;
	while (itemItr != NULL)
	{
		SObject *dFeat;
		const char *tmp;


		dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemPathToFeatProc\" failed"))
			goto quit_error;

		if (dFeat == NULL)
		{
			S_CTX_ERR(error, S_FAILURE,
					  "Run",
					  "Failed to generate hts labels for segment item");
			goto quit_error;
		}

		tmp = SObjectGetString(dFeat, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SObjectGetString\" failed"))
			goto quit_error;

		label_data[counter++] = s_strdup(tmp, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"s_strdup\" failed"))
			goto quit_error;

		SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetObject\" failed"))
			goto quit_error;

		itemItr = SItemNext(itemItr, error);
	}

	/* speech synthesis part */
	HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size);
	check_and_change_rate_volume(HTSsynth, utt, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"check_and_change_rate_volume\" failed"))
		goto quit_error;

	HTS_Engine_create_sstream(&(HTSsynth->engine));
	check_and_change_tone(HTSsynth, utt, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"check_and_change_tone\" failed"))
		goto quit_error;

	HTS_Engine_create_pstream(&(HTSsynth->engine));

	if (HTSsynth->me == TRUE) /* mixed excitation */
	{
		HTS_Engine_create_gstream_me(&(HTSsynth->engine),
									 HTSsynth->me_num_filters, HTSsynth->me_filter_order,
									 HTSsynth->me_filter, HTSsynth->xp_sig, HTSsynth->xn_sig,
									 HTSsynth->hp, HTSsynth->hn,
									 HTSsynth->pd_filter, HTSsynth->pd_filter_order);
	}
	else
	{
		HTS_Engine_create_gstream(&(HTSsynth->engine));
	}

	nstate = HTS_Speect_ModelSet_get_nstate(&(HTSsynth->engine));
	itemItr = item;
	counter = 0;
	frame = 0;
	state = 0;
	while (itemItr != NULL)
	{
		int j;
		int duration = 0;
		float tmp;

		for (j = 0; j < nstate; j++)
			duration += HTS_Speect_SStreamSet_get_duration(&(HTSsynth->engine), state++);

		tmp = frame * rate;
		SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetFloat\" failed"))
			goto quit_error;

		tmp = (frame + duration) * rate;
		SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetFloat\" failed"))
			goto quit_error;

		frame += duration;
		itemItr = SItemNext(itemItr, error);
		counter++;
	}

	/* We need to give the utterance the audio plug-in. If we don't do
	 * this and the voice is deleted before the utterance, then the
	 * utterance can't do *anything* with the audio. Not even delete
	 * it (segfault). This should be fast because it is already
	 * loaded.
	 * Note that this happens before the audio is set. This is because
	 * utt features are a list implementation.
	 */
	audioPlugin = s_pm_load_plugin("audio.spi", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
		goto quit_error;

	SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
	{
		S_DELETE(audioPlugin, "Run", error);
		goto quit_error;
	}

	/* create an audio object */
	audio = S_NEW(SAudio, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Failed to create new 'SAudio' object"))
		goto quit_error;

	/* set audio feature in utterance */
	SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
	{
		S_DELETE(audio, "Run", error);
		goto quit_error;
    }

	audio->sample_rate = HTSsynth->engine.global.sampling_rate;
	audio->num_samples = (uint32)HTS_Speect_GStreamSet_get_total_nsample(&(HTSsynth->engine));
	audio->samples = S_MALLOC(float, audio->num_samples);
	if (audio->samples == NULL)
	{
		S_FTL_ERR(error, S_MEMERROR,
				  "Run",
				  "Failed to allocate memory for 'float' object");
		goto quit_error;
	}

	/* write data */
	for (i = 0; i < audio->num_samples; i++)
		audio->samples[i] = (float)(HTS_Speect_GStreamSet_get_speech(&(HTSsynth->engine), i) * 1.0);

	for (counter = 0; counter < label_size; counter++)
		S_FREE(label_data[counter]);
	S_FREE(label_data);

	HTS_Engine_refresh(&(HTSsynth->engine));

	/* all OK here */
	return;

	/* error clean-up code */
quit_error:
	if (label_data != NULL)
	{
		for (counter = 0; counter < label_size; counter++)
		{
			if (label_data[counter] != NULL)
				S_FREE(label_data[counter]);
		}

		S_FREE(label_data);
	}

	return;
}