C++ (Cpp) HTS_Vocoder_synthesize示例

示例#1

0

显示文件

文件： mage_hts_engine_impl.cpp 项目： Olga-Yakovleva/RHVoice

 void mage_hts_engine_impl::generate_samples(hts_label& lab)
 {
   double pitch=lab.get_pitch();
   MAGE::FrameQueue* fq=mage->getFrameQueue();
   while(!(output->is_stopped()||fq->isEmpty()))
     {
       MAGE::Frame* f=fq->get();
       std::copy(f->streams[MAGE::mgcStreamIndex],f->streams[MAGE::mgcStreamIndex]+mgc.size(),mgc.begin());
 std::copy(f->streams[MAGE::bapStreamIndex],f->streams[MAGE::bapStreamIndex]+ap.size(),ap.begin());
 for(int i=0;i<ap.size();++i)
   {
     if(ap[i]>0)
       ap[i]=0;
     ap[i]=std::pow(10.0,ap[i]/10.0);
   }
       double lf0=(f->voiced)?(f->streams[MAGE::lf0StreamIndex][0]):LZERO;
       if(f->voiced&&(pitch!=1))
         {
           double f0=std::exp(lf0)*pitch;
           if(f0<20)
             f0=20;
           lf0=std::log(f0);
         }
       fq->pop();
       HTS_Vocoder_synthesize(vocoder.get(),mgc_order,lf0,&(mgc[0]),&(ap[0]),&bpf,alpha,beta,1,&(speech[0]),0);
       for(int i=0;i<frame_shift;++i)
         {
           speech[i]/=32768.0;
         }
       output->process(&(speech[0]),frame_shift);
     }
 }

示例#2

0

显示文件

文件： HTS_gstream.c 项目： rizaqpratama/yukbaca

/* (stream[0] == spectrum && stream[1] == lf0) */
void HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss,
                           int stage, HTS_Boolean use_log_gain,
                           int sampling_rate, int fperiod, double alpha,
                           double beta, int audio_buff_size)
{
   int i, j, k;
#ifdef HTS_EMBEDDED
   double lf0;
#endif                          /* HTS_EMBEDDED */
   int msd_frame;
   HTS_Vocoder v;

   /* check */
#ifdef HTS_EMBEDDED
   if (gss->gspeech)
#else
   if (gss->gstream || gss->gspeech)
#endif                          /* HTS_EMBEDDED */
      HTS_error(1,
                "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");

   /* initialize */
   gss->nstream = HTS_PStreamSet_get_nstream(pss);
   gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
   gss->total_nsample = fperiod * gss->total_frame;
#ifndef HTS_EMBEDDED
   gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
   for (i = 0; i < gss->nstream; i++) {
      gss->gstream[i].static_length = HTS_PStreamSet_get_static_length(pss, i);
      gss->gstream[i].par =
          (double **) HTS_calloc(gss->total_frame, sizeof(double *));
      for (j = 0; j < gss->total_frame; j++)
         gss->gstream[i].par[j] =
             (double *) HTS_calloc(gss->gstream[i].static_length,
                                   sizeof(double));
   }
#endif                          /* !HTS_EMBEDDED */
   gss->gspeech = (short *) HTS_calloc(gss->total_nsample, sizeof(short));

#ifndef HTS_EMBEDDED
   /* copy generated parameter */
   for (i = 0; i < gss->nstream; i++) {
      if (HTS_PStreamSet_is_msd(pss, i)) {      /* for MSD */
         for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
            if (HTS_PStreamSet_get_msd_flag(pss, i, j)) {
               for (k = 0; k < gss->gstream[i].static_length; k++)
                  gss->gstream[i].par[j][k] =
                      HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
               msd_frame++;
            } else
               for (k = 0; k < gss->gstream[i].static_length; k++)
                  gss->gstream[i].par[j][k] = LZERO;
      } else {                  /* for non MSD */
         for (j = 0; j < gss->total_frame; j++)
            for (k = 0; k < gss->gstream[i].static_length; k++)
               gss->gstream[i].par[j][k] =
                   HTS_PStreamSet_get_parameter(pss, i, j, k);
      }
   }
#endif                          /* !HTS_EMBEDDED */

   /* check */
   if (gss->nstream != 2)
      HTS_error(1,
                "HTS_GStreamSet_create: The number of streams should be 2.\n");
   if (HTS_PStreamSet_get_static_length(pss, 1) != 1)
      HTS_error(1,
                "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");

   /* synthesize speech waveform */
#ifdef HTS_EMBEDDED
   HTS_Vocoder_initialize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1,
                          stage, use_log_gain, sampling_rate, fperiod,
                          audio_buff_size);
   for (i = 0, msd_frame = 0; i < gss->total_frame; i++) {
      lf0 = LZERO;
      if (HTS_PStreamSet_get_msd_flag(pss, 1, i))
         lf0 = HTS_PStreamSet_get_parameter(pss, 1, msd_frame++, 0);
      HTS_Vocoder_synthesize(&v, HTS_PStreamSet_get_static_length(pss, 0) - 1,
                             lf0,
                             HTS_PStreamSet_get_parameter_vector(pss, 0, i),
                             alpha, beta, &gss->gspeech[i * fperiod]);
   }
#else
   HTS_Vocoder_initialize(&v, gss->gstream[0].static_length - 1, stage,
                          use_log_gain, sampling_rate, fperiod,
                          audio_buff_size);
   for (i = 0; i < gss->total_frame; i++) {
      HTS_Vocoder_synthesize(&v, gss->gstream[0].static_length - 1,
                             gss->gstream[1].par[i][0],
                             &gss->gstream[0].par[i][0], alpha, beta,
                             &gss->gspeech[i * fperiod]);
   }
#endif                          /* HTS_EMBEDDED */
   HTS_Vocoder_clear(&v);
}

示例#3

0

显示文件

文件： HTS_gstream.c 项目： MaxMEllon/node-openjtalk

/* HTS_GStreamSet_create: generate speech */
HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
{
   size_t i, j, k;
   size_t msd_frame;
   HTS_Vocoder v;
   size_t nlpf = 0;
   double *lpf = NULL;

   /* check */
   if (gss->gstream || gss->gspeech) {
      HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
      return FALSE;
   }

   /* initialize */
   gss->nstream = HTS_PStreamSet_get_nstream(pss);
   gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
   gss->total_nsample = fperiod * gss->total_frame;
   gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
   for (i = 0; i < gss->nstream; i++) {
      gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
      gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
      for (j = 0; j < gss->total_frame; j++)
         gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
   }
   gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));

   /* copy generated parameter */
   for (i = 0; i < gss->nstream; i++) {
      if (HTS_PStreamSet_is_msd(pss, i)) {      /* for MSD */
         for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
            if (HTS_PStreamSet_get_msd_flag(pss, i, j)) {
               for (k = 0; k < gss->gstream[i].vector_length; k++)
                  gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
               msd_frame++;
            } else
               for (k = 0; k < gss->gstream[i].vector_length; k++)
                  gss->gstream[i].par[j][k] = HTS_NODATA;
      } else {                  /* for non MSD */
         for (j = 0; j < gss->total_frame; j++)
            for (k = 0; k < gss->gstream[i].vector_length; k++)
               gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
      }
   }

   /* check */
   if (gss->nstream != 2 && gss->nstream != 3) {
      HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
      HTS_GStreamSet_clear(gss);
      return FALSE;
   }
   if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
      HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
      HTS_GStreamSet_clear(gss);
      return FALSE;
   }
   if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
      HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
      HTS_GStreamSet_clear(gss);
      return FALSE;
   }

   /* synthesize speech waveform */
   HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
   if (gss->nstream >= 3)
      nlpf = gss->gstream[2].vector_length;
   for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
      j = i * fperiod;
      if (gss->nstream >= 3)
         lpf = &gss->gstream[2].par[i][0];
      HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
   }
   HTS_Vocoder_clear(&v);
   if (audio)
      HTS_Audio_flush(audio);

   return TRUE;
}