/* (stream[0] == spectrum && stream[1] == lf0) */ void HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, int stage, HTS_Boolean use_log_gain, int sampling_rate, int fperiod, double alpha, double beta, int audio_buff_size) { int i, j, k; #ifdef HTS_EMBEDDED double lf0; #endif /* HTS_EMBEDDED */ int msd_frame; HTS_Vocoder v; /* check */ #ifdef HTS_EMBEDDED if (gss->gspeech) #else if (gss->gstream || gss->gspeech) #endif /* HTS_EMBEDDED */ HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n"); /* initialize */ gss->nstream = HTS_PStreamSet_get_nstream(pss); gss->total_frame = HTS_PStreamSet_get_total_frame(pss); gss->total_nsample = fperiod * gss->total_frame; #ifndef HTS_EMBEDDED gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream)); for (i = 0; i < gss->nstream; i++) { gss->gstream[i].static_length = HTS_PStreamSet_get_static_length(pss, i); gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *)); for (j = 0; j < gss->total_frame; j++) gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].static_length, sizeof(double)); } #endif /* !HTS_EMBEDDED */ gss->gspeech = (short *) HTS_calloc(gss->total_nsample, sizeof(short)); #ifndef HTS_EMBEDDED /* copy generated parameter */ for (i = 0; i < gss->nstream; i++) { if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */ for (j = 0, msd_frame = 0; j < gss->total_frame; j++) if (HTS_PStreamSet_get_msd_flag(pss, i, j)) { for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k); msd_frame++; } else for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = LZERO; } else { /* for non MSD */ for (j = 0; j < gss->total_frame; j++) for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k); } } #endif /* !HTS_EMBEDDED */ /* check */ if (gss->nstream != 2) HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2.\n"); if (HTS_PStreamSet_get_static_length(pss, 1) != 1) HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n"); /* synthesize speech waveform */ #ifdef HTS_EMBEDDED HTS_Vocoder_initialize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1, stage, use_log_gain, sampling_rate, fperiod, audio_buff_size); for (i = 0, msd_frame = 0; i < gss->total_frame; i++) { lf0 = LZERO; if (HTS_PStreamSet_get_msd_flag(pss, 1, i)) lf0 = HTS_PStreamSet_get_parameter(pss, 1, msd_frame++, 0); HTS_Vocoder_synthesize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1, lf0, HTS_PStreamSet_get_parameter_vector(pss, 0, i), alpha, beta, &gss->gspeech[i * fperiod]); } #else HTS_Vocoder_initialize(&v, gss->gstream[0].static_length - 1, stage, use_log_gain, sampling_rate, fperiod, audio_buff_size); for (i = 0; i < gss->total_frame; i++) { HTS_Vocoder_synthesize(&v, gss->gstream[0].static_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], alpha, beta, &gss->gspeech[i * fperiod]); } #endif /* HTS_EMBEDDED */ HTS_Vocoder_clear(&v); }
/* HTS_GStreamSet_create: generate speech */ HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio) { size_t i, j, k; size_t msd_frame; HTS_Vocoder v; size_t nlpf = 0; double *lpf = NULL; /* check */ if (gss->gstream || gss->gspeech) { HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n"); return FALSE; } /* initialize */ gss->nstream = HTS_PStreamSet_get_nstream(pss); gss->total_frame = HTS_PStreamSet_get_total_frame(pss); gss->total_nsample = fperiod * gss->total_frame; gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream)); for (i = 0; i < gss->nstream; i++) { gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i); gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *)); for (j = 0; j < gss->total_frame; j++) gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double)); } gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double)); /* copy generated parameter */ for (i = 0; i < gss->nstream; i++) { if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */ for (j = 0, msd_frame = 0; j < gss->total_frame; j++) if (HTS_PStreamSet_get_msd_flag(pss, i, j)) { for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k); msd_frame++; } else for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_NODATA; } else { /* for non MSD */ for (j = 0; j < gss->total_frame; j++) for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k); } } /* check */ if (gss->nstream != 2 && gss->nstream != 3) { HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n"); HTS_GStreamSet_clear(gss); return FALSE; } if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) { HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n"); HTS_GStreamSet_clear(gss); return FALSE; } if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) { HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers."); HTS_GStreamSet_clear(gss); return FALSE; } /* synthesize speech waveform */ HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod); if (gss->nstream >= 3) nlpf = gss->gstream[2].vector_length; for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) { j = i * fperiod; if (gss->nstream >= 3) lpf = &gss->gstream[2].par[i][0]; HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio); } HTS_Vocoder_clear(&v); if (audio) HTS_Audio_flush(audio); return TRUE; }