void mage_hts_engine_impl::generate_samples(hts_label& lab) { double pitch=lab.get_pitch(); MAGE::FrameQueue* fq=mage->getFrameQueue(); while(!(output->is_stopped()||fq->isEmpty())) { MAGE::Frame* f=fq->get(); std::copy(f->streams[MAGE::mgcStreamIndex],f->streams[MAGE::mgcStreamIndex]+mgc.size(),mgc.begin()); std::copy(f->streams[MAGE::bapStreamIndex],f->streams[MAGE::bapStreamIndex]+ap.size(),ap.begin()); for(int i=0;i<ap.size();++i) { if(ap[i]>0) ap[i]=0; ap[i]=std::pow(10.0,ap[i]/10.0); } double lf0=(f->voiced)?(f->streams[MAGE::lf0StreamIndex][0]):LZERO; if(f->voiced&&(pitch!=1)) { double f0=std::exp(lf0)*pitch; if(f0<20) f0=20; lf0=std::log(f0); } fq->pop(); HTS_Vocoder_synthesize(vocoder.get(),mgc_order,lf0,&(mgc[0]),&(ap[0]),&bpf,alpha,beta,1,&(speech[0]),0); for(int i=0;i<frame_shift;++i) { speech[i]/=32768.0; } output->process(&(speech[0]),frame_shift); } }
/* (stream[0] == spectrum && stream[1] == lf0) */ void HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, int stage, HTS_Boolean use_log_gain, int sampling_rate, int fperiod, double alpha, double beta, int audio_buff_size) { int i, j, k; #ifdef HTS_EMBEDDED double lf0; #endif /* HTS_EMBEDDED */ int msd_frame; HTS_Vocoder v; /* check */ #ifdef HTS_EMBEDDED if (gss->gspeech) #else if (gss->gstream || gss->gspeech) #endif /* HTS_EMBEDDED */ HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n"); /* initialize */ gss->nstream = HTS_PStreamSet_get_nstream(pss); gss->total_frame = HTS_PStreamSet_get_total_frame(pss); gss->total_nsample = fperiod * gss->total_frame; #ifndef HTS_EMBEDDED gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream)); for (i = 0; i < gss->nstream; i++) { gss->gstream[i].static_length = HTS_PStreamSet_get_static_length(pss, i); gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *)); for (j = 0; j < gss->total_frame; j++) gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].static_length, sizeof(double)); } #endif /* !HTS_EMBEDDED */ gss->gspeech = (short *) HTS_calloc(gss->total_nsample, sizeof(short)); #ifndef HTS_EMBEDDED /* copy generated parameter */ for (i = 0; i < gss->nstream; i++) { if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */ for (j = 0, msd_frame = 0; j < gss->total_frame; j++) if (HTS_PStreamSet_get_msd_flag(pss, i, j)) { for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k); msd_frame++; } else for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = LZERO; } else { /* for non MSD */ for (j = 0; j < gss->total_frame; j++) for (k = 0; k < gss->gstream[i].static_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k); } } #endif /* !HTS_EMBEDDED */ /* check */ if (gss->nstream != 2) HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2.\n"); if (HTS_PStreamSet_get_static_length(pss, 1) != 1) HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n"); /* synthesize speech waveform */ #ifdef HTS_EMBEDDED HTS_Vocoder_initialize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1, stage, use_log_gain, sampling_rate, fperiod, audio_buff_size); for (i = 0, msd_frame = 0; i < gss->total_frame; i++) { lf0 = LZERO; if (HTS_PStreamSet_get_msd_flag(pss, 1, i)) lf0 = HTS_PStreamSet_get_parameter(pss, 1, msd_frame++, 0); HTS_Vocoder_synthesize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1, lf0, HTS_PStreamSet_get_parameter_vector(pss, 0, i), alpha, beta, &gss->gspeech[i * fperiod]); } #else HTS_Vocoder_initialize(&v, gss->gstream[0].static_length - 1, stage, use_log_gain, sampling_rate, fperiod, audio_buff_size); for (i = 0; i < gss->total_frame; i++) { HTS_Vocoder_synthesize(&v, gss->gstream[0].static_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], alpha, beta, &gss->gspeech[i * fperiod]); } #endif /* HTS_EMBEDDED */ HTS_Vocoder_clear(&v); }
/* HTS_GStreamSet_create: generate speech */ HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio) { size_t i, j, k; size_t msd_frame; HTS_Vocoder v; size_t nlpf = 0; double *lpf = NULL; /* check */ if (gss->gstream || gss->gspeech) { HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n"); return FALSE; } /* initialize */ gss->nstream = HTS_PStreamSet_get_nstream(pss); gss->total_frame = HTS_PStreamSet_get_total_frame(pss); gss->total_nsample = fperiod * gss->total_frame; gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream)); for (i = 0; i < gss->nstream; i++) { gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i); gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *)); for (j = 0; j < gss->total_frame; j++) gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double)); } gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double)); /* copy generated parameter */ for (i = 0; i < gss->nstream; i++) { if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */ for (j = 0, msd_frame = 0; j < gss->total_frame; j++) if (HTS_PStreamSet_get_msd_flag(pss, i, j)) { for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k); msd_frame++; } else for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_NODATA; } else { /* for non MSD */ for (j = 0; j < gss->total_frame; j++) for (k = 0; k < gss->gstream[i].vector_length; k++) gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k); } } /* check */ if (gss->nstream != 2 && gss->nstream != 3) { HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n"); HTS_GStreamSet_clear(gss); return FALSE; } if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) { HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n"); HTS_GStreamSet_clear(gss); return FALSE; } if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) { HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers."); HTS_GStreamSet_clear(gss); return FALSE; } /* synthesize speech waveform */ HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod); if (gss->nstream >= 3) nlpf = gss->gstream[2].vector_length; for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) { j = i * fperiod; if (gss->nstream >= 3) lpf = &gss->gstream[2].par[i][0]; HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio); } HTS_Vocoder_clear(&v); if (audio) HTS_Audio_flush(audio); return TRUE; }