Ejemplo n.º 1
0
/* HTS_Engine_refresh: free model per one time synthesis */
void HTS_Engine_refresh(HTS_Engine * engine)
{
   /* free generated parameter stream set */
   HTS_GStreamSet_clear(&engine->gss);
   /* free parameter stream set */
   HTS_PStreamSet_clear(&engine->pss);
   /* free state stream set */
   HTS_SStreamSet_clear(&engine->sss);
}
Ejemplo n.º 2
0
/* HTS_Engine_refresh: free model per one time synthesis */
void HTS_Engine_refresh(HTS_Engine * engine)
{
   /* free generated parameter stream set */
   HTS_GStreamSet_clear(&engine->gss);
   /* free parameter stream set */
   HTS_PStreamSet_clear(&engine->pss);
   /* free state stream set */
   HTS_SStreamSet_clear(&engine->sss);
   /* free label list */
   HTS_Label_clear(&engine->label);
   /* stop flag */
   engine->condition.stop = FALSE;
}
Ejemplo n.º 3
0
/* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
{
   size_t i;
   double f;

   HTS_Engine_refresh(engine);
// Generate state sequence
   if (HTS_SStreamSet_create(&engine->sss, &engine->ms, lines, num_lines, engine->condition.speed) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   if (engine->condition.additional_half_tone != 0.0) {
      for (i = 0; i < HTS_SStreamSet_get_total_state(&engine->sss); i++) {
         f = HTS_SStreamSet_get_mean(&engine->sss, 1, i, 0);
         f += engine->condition.additional_half_tone * HALF_TONE;
         if (f < MIN_LF0)
            f = MIN_LF0;
         else if (f > MAX_LF0)
            f = MAX_LF0;
         HTS_SStreamSet_set_mean(&engine->sss, 1, i, 0, f);
      }
   }
// Generate parameter sequence
   if (HTS_PStreamSet_create(&engine->pss, &engine->sss, engine->condition.msd_threshold) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }

// Free state sequence
   HTS_SStreamSet_clear(&engine->sss);

// Generate sound sample sequence
   if (HTS_GStreamSet_create(&engine->gss, &engine->pss, engine->condition.sampling_frequency,
                             engine->condition.fperiod, engine->condition.alpha,
                             engine->condition.beta) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   return TRUE;
}
Ejemplo n.º 4
0
/* perform conversion */
int vc(const GMM * gmm, const DELTAWINDOW * window, const size_t total_frame,
       const size_t source_vlen, const size_t target_vlen,
       const double *gv_mean, const double *gv_vari,
       const double *source, double *target)
{
   size_t t, i, j, k, max_num_mix = 0,
       src_vlen_dyn = source_vlen * window->win_size,
       tgt_vlen_dyn = target_vlen * window->win_size;
   int m, l, shift;
   double max_post_mix = 0.0, logoutp = LZERO, *input = NULL,
       *src_with_dyn = NULL, *logwgd = NULL,
       **cov_xx_inv = NULL, ***cov_yx_xx = NULL, *gv_weight = NULL,
       ***cond_mean = NULL, ***cond_vari = NULL, **cond_post_mix = NULL;
   HTS_SStreamSet sss;
   HTS_PStreamSet pss;

   /* append dynamic feature */
   src_with_dyn = dgetmem(total_frame * src_vlen_dyn);
   for (t = 0; t < total_frame; t++) {
      for (i = 0; i < window->win_size; i++) {
         j = window->win_size * source_vlen * t + source_vlen * i;
         for (shift = window->win_l_width[i];
              shift <= window->win_r_width[i]; shift++) {
            l = t + shift;
            if (l < 0) {
               l = 0;
            }
            if (!(l < (int) total_frame)) {
               l = total_frame - 1;
            }
            for (k = 0; k < source_vlen; k++) {
               src_with_dyn[j + k] += window->win_coefficient[i][shift]
                   * source[source_vlen * l + k];
            }
         }
      }
   }

   /* calculate mean and covariace of conditional distribution
      given source feature and mixture component */
   cond_post_mix = ddgetmem(total_frame, gmm->nmix);
   cond_mean = (double ***) getmem(gmm->nmix, sizeof(*(cond_mean)));
   for (m = 0; m < gmm->nmix; m++) {
      cond_mean[m] = ddgetmem(total_frame, tgt_vlen_dyn);
   }
   cond_vari = (double ***) getmem(gmm->nmix, sizeof(*(cond_vari)));
   for (m = 0; m < gmm->nmix; m++) {
      cond_vari[m] = ddgetmem(tgt_vlen_dyn, tgt_vlen_dyn);
   }
   cov_xx_inv = ddgetmem(src_vlen_dyn, src_vlen_dyn);
   cov_yx_xx = (double ***) getmem(gmm->nmix, sizeof(*(cov_yx_xx)));
   for (m = 0; m < gmm->nmix; m++) {
      cov_yx_xx[m] = ddgetmem(tgt_vlen_dyn, src_vlen_dyn);
   }
   for (m = 0; m < gmm->nmix; m++) {
      invert(gmm->gauss[m].cov, cov_xx_inv, src_vlen_dyn);
      for (i = 0; i < tgt_vlen_dyn; i++) {
         for (j = 0; j < src_vlen_dyn; j++) {
            for (k = 0; k < src_vlen_dyn; k++) {
               cov_yx_xx[m][i][j] += gmm->gauss[m].cov[src_vlen_dyn + i][k]
                   * cov_xx_inv[k][j];
            }
         }
      }
   }
   logwgd = dgetmem(gmm->nmix);
   input = dgetmem(src_vlen_dyn);
   for (t = 0; t < total_frame; t++) {
      for (i = 0; i < src_vlen_dyn; i++) {
         input[i] = src_with_dyn[t * src_vlen_dyn + i];
      }
      for (m = 0, logoutp = LZERO; m < gmm->nmix; m++) {
         logwgd[m] = log_wgd(gmm, m, src_vlen_dyn, input);
         logoutp = log_add(logoutp, logwgd[m]);
      }
      for (m = 0; m < gmm->nmix; m++) {
         /* posterior probability of mixture component given source feature */
         cond_post_mix[t][m] = exp(logwgd[m] - logoutp);
         for (i = 0; i < tgt_vlen_dyn; i++) {
            for (j = 0; j < src_vlen_dyn; j++) {
               cond_mean[m][t][i] += cov_yx_xx[m][i][j]
                   * (input[j] - gmm->gauss[m].mean[j]);
            }
            cond_mean[m][t][i] += gmm->gauss[m].mean[src_vlen_dyn + i];
         }
      }
   }
   for (m = 0; m < gmm->nmix; m++) {
      for (i = 0; i < tgt_vlen_dyn; i++) {
         for (j = 0; j < tgt_vlen_dyn; j++) {
            for (k = 0; k < src_vlen_dyn; k++) {
               cond_vari[m][i][j] += cov_yx_xx[m][i][k]
                   * gmm->gauss[m].cov[k][src_vlen_dyn + j];
            }
            cond_vari[m][i][j] =
                gmm->gauss[m].cov[src_vlen_dyn + i][src_vlen_dyn + j]
                - cond_vari[m][i][j];
         }
      }
   }

   /* initialize parameter set of hts_engine */
   HTS_PStreamSet_initialize(&pss);
   sss.nstream = 1;
   sss.total_state = total_frame;
   sss.total_frame = total_frame;
   sss.duration = (size_t *) getmem(total_frame, sizeof(size_t));
   for (i = 0; i < total_frame; i++) {
      sss.duration[i] = 1;
   }
   sss.sstream = (HTS_SStream *) getmem(1, sizeof(HTS_SStream));
   sss.sstream->vector_length = target_vlen;
   sss.sstream->mean =
       (double **) getmem(sss.total_state, sizeof(*(sss.sstream->mean)));
   sss.sstream->vari =
       (double **) getmem(sss.total_state, sizeof(*(sss.sstream->vari)));
   for (i = 0; i < sss.total_state; i++) {
      sss.sstream->mean[i] = dgetmem(tgt_vlen_dyn);
      sss.sstream->vari[i] = dgetmem(tgt_vlen_dyn);
   }
   sss.sstream->msd = NULL;     /* no MSD */
   sss.sstream->win_size = window->win_size;
   sss.sstream->win_l_width =
       (int *) getmem(window->win_size, sizeof(*(sss.sstream->win_l_width)));
   sss.sstream->win_r_width =
       (int *) getmem(window->win_size, sizeof(*(sss.sstream->win_r_width)));
   sss.sstream->win_coefficient =
       (double **) getmem(window->win_size,
                          sizeof(*(sss.sstream->win_coefficient)));
   for (i = 0; i < window->win_size; i++) {
      sss.sstream->win_l_width[i] = window->win_l_width[i];
      sss.sstream->win_r_width[i] = window->win_r_width[i];
      if (sss.sstream->win_l_width[i] + sss.sstream->win_r_width[i] == 0) {
         sss.sstream->win_coefficient[i] =
             dgetmem(-2 * sss.sstream->win_l_width[i] + 1);
      } else {
         sss.sstream->win_coefficient[i] =
             dgetmem(-2 * sss.sstream->win_l_width[i]);
      }
      sss.sstream->win_coefficient[i] -= sss.sstream->win_l_width[i];
      for (shift = sss.sstream->win_l_width[i];
           shift <= sss.sstream->win_r_width[i]; shift++) {
         sss.sstream->win_coefficient[i][shift] =
             window->win_coefficient[i][shift];
      }
   }
   sss.sstream->win_max_width = window->win_max_width;
   if ((gv_mean != NULL) && (gv_vari != NULL)) {        /* set GV parameters */
      sss.sstream->gv_mean = dgetmem(sss.sstream->vector_length);
      sss.sstream->gv_vari = dgetmem(sss.sstream->vector_length);
      for (i = 0; i < sss.sstream->vector_length; i++) {
         sss.sstream->gv_mean[i] = gv_mean[i];
         sss.sstream->gv_vari[i] = gv_vari[i];
      }
   } else {
      sss.sstream->gv_mean = NULL;
      sss.sstream->gv_vari = NULL;
   }
   sss.sstream->gv_switch =
       (HTS_Boolean *) getmem(total_frame, sizeof(HTS_Boolean));
   for (i = 0; i < total_frame; i++) {
      sss.sstream->gv_switch[i] = TRUE;
   }
   gv_weight = dgetmem(tgt_vlen_dyn);
   for (i = 0; i < tgt_vlen_dyn; i++) {
      gv_weight[i] = 1.0;
   }

   /* initialize pdf sequence */
   for (t = 0; t < total_frame; t++) {
      max_post_mix = cond_post_mix[t][0];
      max_num_mix = 0;
      for (m = 1; m < gmm->nmix; m++) {
         if (max_post_mix < cond_post_mix[t][m]) {
            max_post_mix = cond_post_mix[t][m];
            max_num_mix = m;
         }
      }
      for (i = 0; i < tgt_vlen_dyn; i++) {
         sss.sstream->mean[t][i] = cond_mean[max_num_mix][t][i];
         sss.sstream->vari[t][i] = cond_vari[max_num_mix][i][i];
      }
   }

   /* parameter generation by hts_engine API */
   HTS_PStreamSet_create(&pss, &sss, NULL, gv_weight);
   for (t = 0; t < total_frame; t++) {
      k = t * target_vlen;
      for (i = 0; i < target_vlen; i++) {
         target[k + i] = pss.pstream->par[t][i];
      }
   }

   /* release memory */
   free(src_with_dyn);
   free(input);
   free(logwgd);
   free(cov_xx_inv[0]);
   free(cov_xx_inv);
   for (m = 0; m < gmm->nmix; m++) {
      free(cov_yx_xx[m][0]);
      free(cov_yx_xx[m]);
      free(cond_mean[m][0]);
      free(cond_mean[m]);
      free(cond_vari[m][0]);
      free(cond_vari[m]);
   }
   free(cov_yx_xx);
   free(cond_mean);
   free(cond_vari);
   free(cond_post_mix[0]);
   free(cond_post_mix);
   free(gv_weight);
   HTS_PStreamSet_clear(&pss);
   HTS_SStreamSet_clear(&sss);

   return (0);
}