Пример #1
0
/* HTS_Engine_initialize: initialize engine */
void HTS_Engine_initialize(HTS_Engine * engine)
{
   /* global */
   engine->condition.sampling_frequency = 0;
   engine->condition.fperiod = 0;
   engine->condition.msd_threshold = NULL;

   /* duration */
   engine->condition.speed = 1.0;

   /* spectrum */
   engine->condition.alpha = 0.0;
   engine->condition.beta = 0.0;

   /* log F0 */
   engine->condition.additional_half_tone = 0.0;

   /* initialize model set */
   HTS_ModelSet_initialize(&engine->ms);
   /* initialize state sequence set */
   HTS_SStreamSet_initialize(&engine->sss);
   /* initialize pstream set */
   HTS_PStreamSet_initialize(&engine->pss);
   /* initialize gstream set */
   HTS_GStreamSet_initialize(&engine->gss);
}
Пример #2
0
/* HTS_PStreamSet_clear: free parameter stream set */
void HTS_PStreamSet_clear(HTS_PStreamSet * pss)
{
   int i, j;
   HTS_PStream *pstream;

   if (pss->pstream) {
      for (i = 0; i < pss->nstream; i++) {
         pstream = &pss->pstream[i];
         HTS_free(pstream->sm.wum);
         HTS_free(pstream->sm.g);
         HTS_free_matrix(pstream->sm.wuw, pstream->length);
         HTS_free_matrix(pstream->sm.ivar, pstream->length);
         HTS_free_matrix(pstream->sm.mean, pstream->length);
         HTS_free_matrix(pstream->par, pstream->length);
         if (pstream->msd_flag)
            HTS_free(pstream->msd_flag);
         for (j = pstream->win_size - 1; j >= 0; j--) {
            pstream->win_coefficient[j] += pstream->win_l_width[j];
            HTS_free(pstream->win_coefficient[j]);
         }
         if (pstream->gv_mean)
            HTS_free(pstream->gv_mean);
         if (pstream->gv_vari)
            HTS_free(pstream->gv_vari);
         HTS_free(pstream->win_coefficient);
         HTS_free(pstream->win_l_width);
         HTS_free(pstream->win_r_width);
         if (pstream->gv_switch)
            HTS_free(pstream->gv_switch);
      }
      HTS_free(pss->pstream);
   }
   HTS_PStreamSet_initialize(pss);
}
Пример #3
0
/* HTS_Engine_initialize: initialize engine */
void HTS_Engine_initialize(HTS_Engine * engine, int nstream)
{
    int i;

    /* default value for control parameter */
    engine->global.stage = 0;
    engine->global.use_log_gain = FALSE;
    engine->global.sampling_rate = 16000;
    engine->global.fperiod = 80;
    engine->global.alpha = 0.42;
    engine->global.beta = 0.0;
    engine->global.audio_buff_size = 0;
    engine->global.msd_threshold =
        (double *) HTS_calloc(nstream, sizeof(double));
    for (i = 0; i < nstream; i++)
        engine->global.msd_threshold[i] = 0.5;

    /* interpolation weight */
    engine->global.parameter_iw =
        (double **) HTS_calloc(nstream, sizeof(double *));
    engine->global.gv_iw = (double **) HTS_calloc(nstream, sizeof(double *));
    engine->global.duration_iw = NULL;
    for (i = 0; i < nstream; i++)
        engine->global.parameter_iw[i] = NULL;
    for (i = 0; i < nstream; i++)
        engine->global.gv_iw[i] = NULL;

    /* GV weight */
    engine->global.gv_weight = (double *) HTS_calloc(nstream, sizeof(double));
    for (i = 0; i < nstream; i++)
        engine->global.gv_weight[i] = 1.0;

    /* initialize model set */
    HTS_ModelSet_initialize(&engine->ms, nstream);
    /* initialize label list */
    HTS_Label_initialize(&engine->label);
    /* initialize state sequence set */
    HTS_SStreamSet_initialize(&engine->sss);
    /* initialize pstream set */
    HTS_PStreamSet_initialize(&engine->pss);
    /* initialize gstream set */
    HTS_GStreamSet_initialize(&engine->gss);
}
Пример #4
0
/* HTS_Engine_initialize: initialize engine */
void HTS_Engine_initialize(HTS_Engine * engine)
{
   /* global */
   engine->condition.sampling_frequency = 0;
   engine->condition.fperiod = 0;
   engine->condition.audio_buff_size = 0;
   engine->condition.stop = FALSE;
   engine->condition.volume = 1.0;
   engine->condition.msd_threshold = NULL;
   engine->condition.gv_weight = NULL;

   /* duration */
   engine->condition.speed = 1.0;
   engine->condition.phoneme_alignment_flag = FALSE;

   /* spectrum */
   engine->condition.stage = 0;
   engine->condition.use_log_gain = FALSE;
   engine->condition.alpha = 0.0;
   engine->condition.beta = 0.0;

   /* log F0 */
   engine->condition.additional_half_tone = 0.0;

   /* interpolation weights */
   engine->condition.duration_iw = NULL;
   engine->condition.parameter_iw = NULL;
   engine->condition.gv_iw = NULL;

   /* initialize audio */
   HTS_Audio_initialize(&engine->audio);
   /* initialize model set */
   HTS_ModelSet_initialize(&engine->ms);
   /* initialize label list */
   HTS_Label_initialize(&engine->label);
   /* initialize state sequence set */
   HTS_SStreamSet_initialize(&engine->sss);
   /* initialize pstream set */
   HTS_PStreamSet_initialize(&engine->pss);
   /* initialize gstream set */
   HTS_GStreamSet_initialize(&engine->gss);
}
Пример #5
0
/* perform conversion */
int vc(const GMM * gmm, const DELTAWINDOW * window, const size_t total_frame,
       const size_t source_vlen, const size_t target_vlen,
       const double *gv_mean, const double *gv_vari,
       const double *source, double *target)
{
   size_t t, i, j, k, max_num_mix = 0,
       src_vlen_dyn = source_vlen * window->win_size,
       tgt_vlen_dyn = target_vlen * window->win_size;
   int m, l, shift;
   double max_post_mix = 0.0, logoutp = LZERO, *input = NULL,
       *src_with_dyn = NULL, *logwgd = NULL,
       **cov_xx_inv = NULL, ***cov_yx_xx = NULL, *gv_weight = NULL,
       ***cond_mean = NULL, ***cond_vari = NULL, **cond_post_mix = NULL;
   HTS_SStreamSet sss;
   HTS_PStreamSet pss;

   /* append dynamic feature */
   src_with_dyn = dgetmem(total_frame * src_vlen_dyn);
   for (t = 0; t < total_frame; t++) {
      for (i = 0; i < window->win_size; i++) {
         j = window->win_size * source_vlen * t + source_vlen * i;
         for (shift = window->win_l_width[i];
              shift <= window->win_r_width[i]; shift++) {
            l = t + shift;
            if (l < 0) {
               l = 0;
            }
            if (!(l < (int) total_frame)) {
               l = total_frame - 1;
            }
            for (k = 0; k < source_vlen; k++) {
               src_with_dyn[j + k] += window->win_coefficient[i][shift]
                   * source[source_vlen * l + k];
            }
         }
      }
   }

   /* calculate mean and covariace of conditional distribution
      given source feature and mixture component */
   cond_post_mix = ddgetmem(total_frame, gmm->nmix);
   cond_mean = (double ***) getmem(gmm->nmix, sizeof(*(cond_mean)));
   for (m = 0; m < gmm->nmix; m++) {
      cond_mean[m] = ddgetmem(total_frame, tgt_vlen_dyn);
   }
   cond_vari = (double ***) getmem(gmm->nmix, sizeof(*(cond_vari)));
   for (m = 0; m < gmm->nmix; m++) {
      cond_vari[m] = ddgetmem(tgt_vlen_dyn, tgt_vlen_dyn);
   }
   cov_xx_inv = ddgetmem(src_vlen_dyn, src_vlen_dyn);
   cov_yx_xx = (double ***) getmem(gmm->nmix, sizeof(*(cov_yx_xx)));
   for (m = 0; m < gmm->nmix; m++) {
      cov_yx_xx[m] = ddgetmem(tgt_vlen_dyn, src_vlen_dyn);
   }
   for (m = 0; m < gmm->nmix; m++) {
      invert(gmm->gauss[m].cov, cov_xx_inv, src_vlen_dyn);
      for (i = 0; i < tgt_vlen_dyn; i++) {
         for (j = 0; j < src_vlen_dyn; j++) {
            for (k = 0; k < src_vlen_dyn; k++) {
               cov_yx_xx[m][i][j] += gmm->gauss[m].cov[src_vlen_dyn + i][k]
                   * cov_xx_inv[k][j];
            }
         }
      }
   }
   logwgd = dgetmem(gmm->nmix);
   input = dgetmem(src_vlen_dyn);
   for (t = 0; t < total_frame; t++) {
      for (i = 0; i < src_vlen_dyn; i++) {
         input[i] = src_with_dyn[t * src_vlen_dyn + i];
      }
      for (m = 0, logoutp = LZERO; m < gmm->nmix; m++) {
         logwgd[m] = log_wgd(gmm, m, src_vlen_dyn, input);
         logoutp = log_add(logoutp, logwgd[m]);
      }
      for (m = 0; m < gmm->nmix; m++) {
         /* posterior probability of mixture component given source feature */
         cond_post_mix[t][m] = exp(logwgd[m] - logoutp);
         for (i = 0; i < tgt_vlen_dyn; i++) {
            for (j = 0; j < src_vlen_dyn; j++) {
               cond_mean[m][t][i] += cov_yx_xx[m][i][j]
                   * (input[j] - gmm->gauss[m].mean[j]);
            }
            cond_mean[m][t][i] += gmm->gauss[m].mean[src_vlen_dyn + i];
         }
      }
   }
   for (m = 0; m < gmm->nmix; m++) {
      for (i = 0; i < tgt_vlen_dyn; i++) {
         for (j = 0; j < tgt_vlen_dyn; j++) {
            for (k = 0; k < src_vlen_dyn; k++) {
               cond_vari[m][i][j] += cov_yx_xx[m][i][k]
                   * gmm->gauss[m].cov[k][src_vlen_dyn + j];
            }
            cond_vari[m][i][j] =
                gmm->gauss[m].cov[src_vlen_dyn + i][src_vlen_dyn + j]
                - cond_vari[m][i][j];
         }
      }
   }

   /* initialize parameter set of hts_engine */
   HTS_PStreamSet_initialize(&pss);
   sss.nstream = 1;
   sss.total_state = total_frame;
   sss.total_frame = total_frame;
   sss.duration = (size_t *) getmem(total_frame, sizeof(size_t));
   for (i = 0; i < total_frame; i++) {
      sss.duration[i] = 1;
   }
   sss.sstream = (HTS_SStream *) getmem(1, sizeof(HTS_SStream));
   sss.sstream->vector_length = target_vlen;
   sss.sstream->mean =
       (double **) getmem(sss.total_state, sizeof(*(sss.sstream->mean)));
   sss.sstream->vari =
       (double **) getmem(sss.total_state, sizeof(*(sss.sstream->vari)));
   for (i = 0; i < sss.total_state; i++) {
      sss.sstream->mean[i] = dgetmem(tgt_vlen_dyn);
      sss.sstream->vari[i] = dgetmem(tgt_vlen_dyn);
   }
   sss.sstream->msd = NULL;     /* no MSD */
   sss.sstream->win_size = window->win_size;
   sss.sstream->win_l_width =
       (int *) getmem(window->win_size, sizeof(*(sss.sstream->win_l_width)));
   sss.sstream->win_r_width =
       (int *) getmem(window->win_size, sizeof(*(sss.sstream->win_r_width)));
   sss.sstream->win_coefficient =
       (double **) getmem(window->win_size,
                          sizeof(*(sss.sstream->win_coefficient)));
   for (i = 0; i < window->win_size; i++) {
      sss.sstream->win_l_width[i] = window->win_l_width[i];
      sss.sstream->win_r_width[i] = window->win_r_width[i];
      if (sss.sstream->win_l_width[i] + sss.sstream->win_r_width[i] == 0) {
         sss.sstream->win_coefficient[i] =
             dgetmem(-2 * sss.sstream->win_l_width[i] + 1);
      } else {
         sss.sstream->win_coefficient[i] =
             dgetmem(-2 * sss.sstream->win_l_width[i]);
      }
      sss.sstream->win_coefficient[i] -= sss.sstream->win_l_width[i];
      for (shift = sss.sstream->win_l_width[i];
           shift <= sss.sstream->win_r_width[i]; shift++) {
         sss.sstream->win_coefficient[i][shift] =
             window->win_coefficient[i][shift];
      }
   }
   sss.sstream->win_max_width = window->win_max_width;
   if ((gv_mean != NULL) && (gv_vari != NULL)) {        /* set GV parameters */
      sss.sstream->gv_mean = dgetmem(sss.sstream->vector_length);
      sss.sstream->gv_vari = dgetmem(sss.sstream->vector_length);
      for (i = 0; i < sss.sstream->vector_length; i++) {
         sss.sstream->gv_mean[i] = gv_mean[i];
         sss.sstream->gv_vari[i] = gv_vari[i];
      }
   } else {
      sss.sstream->gv_mean = NULL;
      sss.sstream->gv_vari = NULL;
   }
   sss.sstream->gv_switch =
       (HTS_Boolean *) getmem(total_frame, sizeof(HTS_Boolean));
   for (i = 0; i < total_frame; i++) {
      sss.sstream->gv_switch[i] = TRUE;
   }
   gv_weight = dgetmem(tgt_vlen_dyn);
   for (i = 0; i < tgt_vlen_dyn; i++) {
      gv_weight[i] = 1.0;
   }

   /* initialize pdf sequence */
   for (t = 0; t < total_frame; t++) {
      max_post_mix = cond_post_mix[t][0];
      max_num_mix = 0;
      for (m = 1; m < gmm->nmix; m++) {
         if (max_post_mix < cond_post_mix[t][m]) {
            max_post_mix = cond_post_mix[t][m];
            max_num_mix = m;
         }
      }
      for (i = 0; i < tgt_vlen_dyn; i++) {
         sss.sstream->mean[t][i] = cond_mean[max_num_mix][t][i];
         sss.sstream->vari[t][i] = cond_vari[max_num_mix][i][i];
      }
   }

   /* parameter generation by hts_engine API */
   HTS_PStreamSet_create(&pss, &sss, NULL, gv_weight);
   for (t = 0; t < total_frame; t++) {
      k = t * target_vlen;
      for (i = 0; i < target_vlen; i++) {
         target[k + i] = pss.pstream->par[t][i];
      }
   }

   /* release memory */
   free(src_with_dyn);
   free(input);
   free(logwgd);
   free(cov_xx_inv[0]);
   free(cov_xx_inv);
   for (m = 0; m < gmm->nmix; m++) {
      free(cov_yx_xx[m][0]);
      free(cov_yx_xx[m]);
      free(cond_mean[m][0]);
      free(cond_mean[m]);
      free(cond_vari[m][0]);
      free(cond_vari[m]);
   }
   free(cov_yx_xx);
   free(cond_mean);
   free(cond_vari);
   free(cond_post_mix[0]);
   free(cond_post_mix);
   free(gv_weight);
   HTS_PStreamSet_clear(&pss);
   HTS_SStreamSet_clear(&sss);

   return (0);
}