コード例 #1
0
ファイル: HTS_engine.c プロジェクト: hecomi/node-openjtalk
/* HTS_Engine_save_label: save label with time */
void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp)
{
   size_t i, j;
   size_t frame, state, duration;

   HTS_Label *label = &engine->label;
   HTS_SStreamSet *sss = &engine->sss;
   size_t nstate = HTS_ModelSet_get_nstate(&engine->ms);
   double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency;

   for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) {
      for (j = 0, duration = 0; j < nstate; j++)
         duration += HTS_SStreamSet_get_duration(sss, state++);
      fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i));
      frame += duration;
   }
}
コード例 #2
0
/* HTS_Engine_save_label: output label with time */
void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp)
{
    int i, j;
    int frame, state, duration;

    HTS_Label *label = &engine->label;
    HTS_SStreamSet *sss = &engine->sss;
    const int nstate = HTS_ModelSet_get_nstate(&engine->ms);
    const double rate =
        engine->global.fperiod * 1e+7 / engine->global.sampling_rate;

    for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) {
        for (j = 0, duration = 0; j < nstate; j++)
            duration += HTS_SStreamSet_get_duration(sss, state++);
        /* in HTK & HTS format */
        fprintf(fp, "%d %d %s\n", (int) (frame * rate),
                (int) ((frame + duration) * rate),
                HTS_Label_get_string(label, i));
        frame += duration;
    }
}
コード例 #3
0
ファイル: HTS_engine.c プロジェクト: hecomi/node-openjtalk
/* HTS_Engine_save_information: save trace information */
void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp)
{
   size_t i, j, k, l, m, n;
   double temp;
   HTS_Condition *condition = &engine->condition;
   HTS_ModelSet *ms = &engine->ms;
   HTS_Label *label = &engine->label;
   HTS_SStreamSet *sss = &engine->sss;
   HTS_PStreamSet *pss = &engine->pss;

   /* global parameter */
   fprintf(fp, "[Global parameter]\n");
   fprintf(fp, "Sampring frequency                     -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency);
   fprintf(fp, "Frame period                           -> %8lu(point)\n", (unsigned long) condition->fperiod);
   fprintf(fp, "                                          %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency);
   fprintf(fp, "All-pass constant                      -> %8.5f\n", (float) condition->alpha);
   fprintf(fp, "Gamma                                  -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage));
   if (condition->stage != 0) {
      if (condition->use_log_gain == TRUE)
         fprintf(fp, "Log gain flag                          ->     TRUE\n");
      else
         fprintf(fp, "Log gain flag                          ->    FALSE\n");
   }
   fprintf(fp, "Postfiltering coefficient              -> %8.5f\n", (float) condition->beta);
   fprintf(fp, "Audio buffer size                      -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size);
   fprintf(fp, "\n");

   /* duration parameter */
   fprintf(fp, "[Duration parameter]\n");
   fprintf(fp, "Number of states                       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms));
   fprintf(fp, "         Interpolation size            -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
   /* check interpolation */
   for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
      temp += condition->duration_iw[i];
   for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
      if (condition->duration_iw[i] != 0.0)
         condition->duration_iw[i] /= temp;
   for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
      fprintf(fp, "         Interpolation weight[%2lu]      -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i]));
   fprintf(fp, "\n");

   fprintf(fp, "[Stream parameter]\n");
   for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
      /* stream parameter */
      fprintf(fp, "Stream[%2lu] vector length               -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i));
      fprintf(fp, "           Dynamic window size         -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i));
      /* interpolation */
      fprintf(fp, "           Interpolation size          -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
      for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
         temp += condition->parameter_iw[j][i];
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
         if (condition->parameter_iw[j][i] != 0.0)
            condition->parameter_iw[j][i] /= temp;
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
         fprintf(fp, "           Interpolation weight[%2lu]    -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[j][i]));
      /* MSD */
      if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */
         fprintf(fp, "           MSD flag                    ->     TRUE\n");
         fprintf(fp, "           MSD threshold               -> %8.5f\n", condition->msd_threshold[i]);
      } else {                  /* for non MSD */
         fprintf(fp, "           MSD flag                    ->    FALSE\n");
      }
      /* GV */
      if (HTS_ModelSet_use_gv(ms, i)) {
         fprintf(fp, "           GV flag                     ->     TRUE\n");
         fprintf(fp, "           GV weight                   -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i]));
         fprintf(fp, "           GV interpolation size       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
         /* interpolation */
         for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
            temp += condition->gv_iw[j][i];
         for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
            if (condition->gv_iw[j][i] != 0.0)
               condition->gv_iw[j][i] /= temp;
         for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
            fprintf(fp, "           GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[j][i]));
      } else {
         fprintf(fp, "           GV flag                     ->    FALSE\n");
      }
   }
   fprintf(fp, "\n");

   /* generated sequence */
   fprintf(fp, "[Generated sequence]\n");
   fprintf(fp, "Number of HMMs                         -> %8lu\n", (unsigned long) HTS_Label_get_size(label));
   fprintf(fp, "Number of stats                        -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms));
   fprintf(fp, "Length of this speech                  -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency));
   fprintf(fp, "                                       -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod);

   for (i = 0; i < HTS_Label_get_size(label); i++) {
      fprintf(fp, "HMM[%2lu]\n", (unsigned long) i);
      fprintf(fp, "  Name                                 -> %s\n", HTS_Label_get_string(label, i));
      fprintf(fp, "  Duration\n");
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) {
         fprintf(fp, "    Interpolation[%2lu]\n", (unsigned long) j);
         HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l);
         fprintf(fp, "      Tree index                       -> %8lu\n", (unsigned long) k);
         fprintf(fp, "      PDF index                        -> %8lu\n", (unsigned long) l);
      }
      for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) {
         fprintf(fp, "  State[%2lu]\n", (unsigned long) j + 2);
         fprintf(fp, "    Length                             -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j));
         for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) {
            fprintf(fp, "    Stream[%2lu]\n", (unsigned long) k);
            if (HTS_ModelSet_is_msd(ms, k)) {
               if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k])
                  fprintf(fp, "      MSD flag                         ->     TRUE\n");
               else
                  fprintf(fp, "      MSD flag                         ->    FALSE\n");
            }
            for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) {
               fprintf(fp, "      Interpolation[%2lu]\n", (unsigned long) l);
               HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n);
               fprintf(fp, "        Tree index                     -> %8lu\n", (unsigned long) m);
               fprintf(fp, "        PDF index                      -> %8lu\n", (unsigned long) n);
            }
         }
      }
   }
}
コード例 #4
0
ファイル: HTS_engine.c プロジェクト: hecomi/node-openjtalk
/* HTS_Engine_get_state_duration: get state duration */
size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index)
{
   return HTS_SStreamSet_get_duration(&engine->sss, state_index);
}
コード例 #5
0
ファイル: HTS_pstream.c プロジェクト: Datikos/RHVoice
/* HTS_PStreamSet_create: parameter generation using GV weight */
void HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss,
                           double *msd_threshold, double *gv_weight)
{
   int i, j, k, l, m;
   int frame, msd_frame, state;

   HTS_PStream *pst;
   HTS_Boolean not_bound;

   if (pss->nstream)
      HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n");

   /* initialize */
   pss->nstream = HTS_SStreamSet_get_nstream(sss);
   pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream));
   pss->total_frame = HTS_SStreamSet_get_total_frame(sss);

   /* create */
   for (i = 0; i < pss->nstream; i++) {
      pst = &pss->pstream[i];
      if (HTS_SStreamSet_is_msd(sss, i)) {      /* for MSD */
         pst->length = 0;
         for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
            if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
               pst->length += HTS_SStreamSet_get_duration(sss, state);
         pst->msd_flag =
             (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean));
         for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss);
              state++)
            if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
               for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
                  pst->msd_flag[frame] = TRUE;
                  frame++;
            } else
               for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
                  pst->msd_flag[frame] = FALSE;
                  frame++;
               }
      } else {                  /* for non MSD */
         pst->length = pss->total_frame;
         pst->msd_flag = NULL;
      }
      pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i);
      pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */
      pst->win_size = HTS_SStreamSet_get_window_size(sss, i);
      pst->static_length = pst->vector_length / pst->win_size;
      pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length);
      pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length);
      pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double));
      pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width);
      pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double));
      pst->par = HTS_alloc_matrix(pst->length, pst->static_length);
      /* copy dynamic window */
      pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
      pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
      pst->win_coefficient =
          (double **) HTS_calloc(pst->win_size, sizeof(double));
      for (j = 0; j < pst->win_size; j++) {
         pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j);
         pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j);
         if (pst->win_l_width[j] + pst->win_r_width[j] == 0)
            pst->win_coefficient[j] = (double *)
                HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double));
         else
            pst->win_coefficient[j] = (double *)
                HTS_calloc(-2 * pst->win_l_width[j], sizeof(double));
         pst->win_coefficient[j] -= pst->win_l_width[j];
         for (k = pst->win_l_width[j]; k <= pst->win_r_width[j]; k++)
            pst->win_coefficient[j][k] =
                HTS_SStreamSet_get_window_coefficient(sss, i, j, k);
      }
      /* copy GV */
      if (HTS_SStreamSet_use_gv(sss, i)) {
         pst->gv_mean =
             (double *) HTS_calloc(pst->static_length, sizeof(double));
         pst->gv_vari =
             (double *) HTS_calloc(pst->static_length, sizeof(double));
         for (j = 0; j < pst->static_length; j++) {
            pst->gv_mean[j] =
                HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i];
            pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j);
         }
         pst->gv_switch =
             (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean));
         if (HTS_SStreamSet_is_msd(sss, i)) {   /* for MSD */
            for (state = 0, frame = 0, msd_frame = 0;
                 state < HTS_SStreamSet_get_total_state(sss); state++)
               for (j = 0; j < HTS_SStreamSet_get_duration(sss, state);
                    j++, frame++)
                  if (pst->msd_flag[frame])
                     pst->gv_switch[msd_frame++] =
                         HTS_SStreamSet_get_gv_switch(sss, i, state);
         } else {               /* for non MSD */
            for (state = 0, frame = 0;
                 state < HTS_SStreamSet_get_total_state(sss); state++)
               for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++)
                  pst->gv_switch[frame++] =
                      HTS_SStreamSet_get_gv_switch(sss, i, state);
         }
         for (j = 0, pst->gv_length = 0; j < pst->length; j++)
            if (pst->gv_switch[j])
               pst->gv_length++;
      } else {
         pst->gv_switch = NULL;
         pst->gv_length = 0;
         pst->gv_mean = NULL;
         pst->gv_vari = NULL;
      }
      /* copy pdfs */
      if (HTS_SStreamSet_is_msd(sss, i)) {      /* for MSD */
         for (state = 0, frame = 0, msd_frame = 0;
              state < HTS_SStreamSet_get_total_state(sss); state++)
            for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
               if (pst->msd_flag[frame]) {
                  /* check current frame is MSD boundary or not */
                  for (k = 0; k < pst->win_size; k++) {
                     not_bound = TRUE;
                     for (l = pst->win_l_width[k]; l <= pst->win_r_width[k];
                          l++)
                        if (frame + l < 0 || pss->total_frame <= frame + l
                            || !pst->msd_flag[frame + l]) {
                           not_bound = FALSE;
                           break;
                        }
                     for (l = 0; l < pst->static_length; l++) {
                        m = pst->static_length * k + l;
                        pst->sm.mean[msd_frame][m] =
                            HTS_SStreamSet_get_mean(sss, i, state, m);
                        if (not_bound || k == 0)
                           pst->sm.ivar[msd_frame][m] =
                               HTS_finv(HTS_SStreamSet_get_vari
                                        (sss, i, state, m));
                        else
                           pst->sm.ivar[msd_frame][m] = 0.0;
                     }
                  }
                  msd_frame++;
               }
               frame++;
            }
      } else {                  /* for non MSD */
         for (state = 0, frame = 0;
              state < HTS_SStreamSet_get_total_state(sss); state++) {
            for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
               for (k = 0; k < pst->win_size; k++) {
                  not_bound = TRUE;
                  for (l = pst->win_l_width[k]; l <= pst->win_r_width[k]; l++)
                     if (frame + l < 0 || pss->total_frame <= frame + l) {
                        not_bound = FALSE;
                        break;
                     }
                  for (l = 0; l < pst->static_length; l++) {
                     m = pst->static_length * k + l;
                     pst->sm.mean[frame][m] =
                         HTS_SStreamSet_get_mean(sss, i, state, m);
                     if (not_bound || k == 0)
                        pst->sm.ivar[frame][m] =
                            HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
                     else
                        pst->sm.ivar[frame][m] = 0.0;
                  }
               }
               frame++;
            }
         }
      }
      /* parameter generation */
      HTS_PStream_mlpg(pst);
   }
}
コード例 #6
0
/* HTS_Engine_save_information: output trace information */
void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp)
{
    int i, j, k, l, m, n;
    double temp;
    HTS_Global *global = &engine->global;
    HTS_ModelSet *ms = &engine->ms;
    HTS_Label *label = &engine->label;
    HTS_SStreamSet *sss = &engine->sss;
    HTS_PStreamSet *pss = &engine->pss;

    /* global parameter */
    fprintf(fp, "[Global parameter]\n");
    fprintf(fp, "Sampring frequency                     -> %8d(Hz)\n",
            global->sampling_rate);
    fprintf(fp, "Frame period                           -> %8d(point)\n",
            global->fperiod);
    fprintf(fp, "                                          %8.5f(msec)\n",
            1e+3 * global->fperiod / global->sampling_rate);
    fprintf(fp, "All-pass constant                      -> %8.5f\n",
            (float) global->alpha);
    fprintf(fp, "Gamma                                  -> %8.5f\n",
            (float) (global->stage == 0 ? 0.0 : -1.0 / global->stage));
    if (global->stage != 0)
        fprintf(fp, "Log gain flag                          -> %s\n",
                global->use_log_gain ? "TRUE" : "FALSE");
    fprintf(fp, "Postfiltering coefficient              -> %8.5f\n",
            (float) global->beta);
    fprintf(fp, "Audio buffer size                      -> %8d(sample)\n",
            global->audio_buff_size);
    fprintf(fp, "\n");

    /* duration parameter */
    fprintf(fp, "[Duration parameter]\n");
    fprintf(fp, "Number of states                       -> %8d\n",
            HTS_ModelSet_get_nstate(ms));
    fprintf(fp, "         Interpolation                 -> %8d\n",
            HTS_ModelSet_get_duration_interpolation_size(ms));
    /* check interpolation */
    for (i = 0, temp = 0.0;
            i < HTS_ModelSet_get_duration_interpolation_size(ms); i++)
        temp += global->duration_iw[i];
    for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++)
        if (global->duration_iw[i] != 0.0)
            global->duration_iw[i] /= temp;
    for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++)
        fprintf(fp,
                "         Interpolation weight[%2d]      -> %8.0f(%%)\n", i,
                (float) (100 * global->duration_iw[i]));
    fprintf(fp, "\n");

    fprintf(fp, "[Stream parameter]\n");
    for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
        /* stream parameter */
        fprintf(fp, "Stream[%2d] vector length               -> %8d\n", i,
                HTS_ModelSet_get_vector_length(ms, i));
        fprintf(fp, "           Dynamic window size         -> %8d\n",
                HTS_ModelSet_get_window_size(ms, i));
        /* interpolation */
        fprintf(fp, "           Interpolation               -> %8d\n",
                HTS_ModelSet_get_parameter_interpolation_size(ms, i));
        for (j = 0, temp = 0.0;
                j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++)
            temp += global->parameter_iw[i][j];
        for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++)
            if (global->parameter_iw[i][j] != 0.0)
                global->parameter_iw[i][j] /= temp;
        for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++)
            fprintf(fp,
                    "           Interpolation weight[%2d]    -> %8.0f(%%)\n", j,
                    (float) (100 * global->parameter_iw[i][j]));
        /* MSD */
        if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */
            fprintf(fp, "           MSD flag                    ->     TRUE\n");
            fprintf(fp, "           MSD threshold               -> %8.5f\n",
                    global->msd_threshold[i]);
        } else {                  /* for non MSD */
            fprintf(fp, "           MSD flag                    ->    FALSE\n");
        }
        /* GV */
        if (HTS_ModelSet_use_gv(ms, i)) {
            fprintf(fp, "           GV flag                     ->     TRUE\n");
            if (HTS_ModelSet_have_gv_switch(ms)) {
                if (HTS_ModelSet_have_gv_tree(ms, i)) {
                    fprintf(fp,
                            "           GV type                     ->     CDGV\n");
                    fprintf(fp,
                            "                                       ->  +SWITCH\n");
                } else
                    fprintf(fp,
                            "           GV type                     ->   SWITCH\n");
            } else {
                if (HTS_ModelSet_have_gv_tree(ms, i))
                    fprintf(fp,
                            "           GV type                     ->     CDGV\n");
                else
                    fprintf(fp,
                            "           GV type                     ->   NORMAL\n");
            }
            fprintf(fp, "           GV weight                   -> %8.0f(%%)\n",
                    (float) (100 * global->gv_weight[i]));
            fprintf(fp, "           GV interpolation size       -> %8d\n",
                    HTS_ModelSet_get_gv_interpolation_size(ms, i));
            /* interpolation */
            for (j = 0, temp = 0.0;
                    j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++)
                temp += global->gv_iw[i][j];
            for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++)
                if (global->gv_iw[i][j] != 0.0)
                    global->gv_iw[i][j] /= temp;
            for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++)
                fprintf(fp,
                        "           GV interpolation weight[%2d] -> %8.0f(%%)\n", j,
                        (float) (100 * global->gv_iw[i][j]));
        } else {
            fprintf(fp, "           GV flag                     ->    FALSE\n");
        }
    }
    fprintf(fp, "\n");

    /* generated sequence */
    fprintf(fp, "[Generated sequence]\n");
    fprintf(fp, "Number of HMMs                         -> %8d\n",
            HTS_Label_get_size(label));
    fprintf(fp, "Number of stats                        -> %8d\n",
            HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms));
    fprintf(fp, "Length of this speech                  -> %8.3f(sec)\n",
            (float) ((double) HTS_PStreamSet_get_total_frame(pss) *
                     global->fperiod / global->sampling_rate));
    fprintf(fp, "                                       -> %8.3d(frames)\n",
            HTS_PStreamSet_get_total_frame(pss) * global->fperiod);

    for (i = 0; i < HTS_Label_get_size(label); i++) {
        fprintf(fp, "HMM[%2d]\n", i);
        fprintf(fp, "  Name                                 -> %s\n",
                HTS_Label_get_string(label, i));
        fprintf(fp, "  Duration\n");
        for (j = 0; j < HTS_ModelSet_get_duration_interpolation_size(ms); j++) {
            fprintf(fp, "    Interpolation[%2d]\n", j);
            HTS_ModelSet_get_duration_index(ms, HTS_Label_get_string(label, i), &k,
                                            &l, j);
            fprintf(fp, "      Tree index                       -> %8d\n", k);
            fprintf(fp, "      PDF index                        -> %8d\n", l);
        }
        for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) {
            fprintf(fp, "  State[%2d]\n", j + 2);
            fprintf(fp, "    Length                             -> %8d(frames)\n",
                    HTS_SStreamSet_get_duration(sss,
                                                i * HTS_ModelSet_get_nstate(ms) +
                                                j));
            for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) {
                fprintf(fp, "    Stream[%2d]\n", k);
                if (HTS_ModelSet_is_msd(ms, k)) {
                    if (HTS_SStreamSet_get_msd
                            (sss, k,
                             i * HTS_ModelSet_get_nstate(ms) + j) >
                            global->msd_threshold[k])
                        fprintf(fp,
                                "      MSD flag                         ->     TRUE\n");
                    else
                        fprintf(fp,
                                "      MSD flag                         ->    FALSE\n");
                }
                for (l = 0;
                        l < HTS_ModelSet_get_parameter_interpolation_size(ms, k);
                        l++) {
                    fprintf(fp, "      Interpolation[%2d]\n", l);
                    HTS_ModelSet_get_parameter_index(ms,
                                                     HTS_Label_get_string(label, i),
                                                     &m, &n, k, j + 2, l);
                    fprintf(fp, "        Tree index                     -> %8d\n",
                            m);
                    fprintf(fp, "        PDF index                      -> %8d\n",
                            n);
                }
            }
        }
    }
}
コード例 #7
0
static void Run(const SUttProcessor *self, SUtterance *utt,
				s_erc *error)
{
	SHTSEngineSynthUttProc104 *HTSsynth = (SHTSEngineSynthUttProc104*)self;
	SPlugin *audioPlugin;
	const SRelation *segmentRel;
	SAudio *audio = NULL;
	s_bool is_present;
	char **label_data = NULL;
	int label_size;
	const SItem *item;
	const SItem *itemItr;
	int counter;
	uint i;
	int frame;
	int state;


	S_CLR_ERR(error);

	/* we require the segment relation */
	is_present = SUtteranceRelationIsPresent(utt, "Segment", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceRelationIsPresent\" failed"))
		goto quit_error;

	if (!is_present)
	{
		S_CTX_ERR(error, S_FAILURE,
				  "Run",
				  "Failed to find 'Segment' relation in utterance");
		goto quit_error;
	}

	segmentRel = SUtteranceGetRelation(utt, "Segment", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceGetRelation\" failed"))
		goto quit_error;

	item = SRelationHead(segmentRel, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SRelationHead\" failed"))
		goto quit_error;

	itemItr = item;
	label_size = 0;
	while (itemItr != NULL)
	{
		label_size++;
		itemItr = SItemNext(itemItr, error);
	}

	label_data = S_CALLOC(char*, label_size);

	itemItr = item;
	counter = 0;
	while (itemItr != NULL)
	{
		SObject *dFeat;
		const char *tmp;


		dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemPathToFeatProc\" failed"))
			goto quit_error;

		if (dFeat == NULL)
		{
			S_CTX_ERR(error, S_FAILURE,
					  "Run",
					  "Failed to generate hts labels for segment item");
			goto quit_error;
		}

		tmp = SObjectGetString(dFeat, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SObjectGetString\" failed"))
			goto quit_error;

		label_data[counter++] = s_strdup(tmp, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"s_strdup\" failed"))
			goto quit_error;

		SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetObject\" failed"))
			goto quit_error;

		itemItr = SItemNext(itemItr, error);
	}

	/* speech synthesis part */
	HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size);
	HTS_Engine_create_sstream(&(HTSsynth->engine));
	HTS_Engine_create_pstream(&(HTSsynth->engine));
	HTS_Engine_create_gstream(&(HTSsynth->engine));

	itemItr = item;
	counter = 0;
	frame = 0;
	state = 0;
	while (itemItr != NULL)
	{
		int j;
		int duration;
		HTS_SStreamSet *sss = &(HTSsynth->engine.sss);
		const int nstate = HTS_ModelSet_get_nstate(&(HTSsynth->engine.ms));
		const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate;
		float tmp;

		for (j = 0, duration = 0; j < nstate; j++)
			duration += HTS_SStreamSet_get_duration(sss, state++);

		tmp = frame * rate;
		SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetFloat\" failed"))
			goto quit_error;

		tmp = (frame + duration) * rate;
		SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error);
		if (S_CHK_ERR(error, S_CONTERR,
					  "Run",
					  "Call to \"SItemSetFloat\" failed"))
			goto quit_error;

		frame += duration;
		itemItr = SItemNext(itemItr, error);
		counter++;
	}

	/* create an audio object */
	audio = S_NEW(SAudio, error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Failed to create new 'SAudio' object"))
		goto quit_error;

	/* set audio feature in utterance */
	SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
		goto quit_error;

	/* We need to give the utterance the audio plug-in. If we don't do
	 * this and the voice is deleted before the utterance, then the
	 * utterance can't do *anything* with the audio. Not even delete
	 * it (segfault). This should be fast because it is already
	 * loaded.
	 * Note that this happens after the audio is set. This is because
	 * utt features are a list implementation.
	 */
	audioPlugin = s_pm_load_plugin("audio.spi", error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
		goto quit_error;

	SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error);
	if (S_CHK_ERR(error, S_CONTERR,
				  "Run",
				  "Call to \"SUtteranceSetFeature\" failed"))
	{
		S_DELETE(audioPlugin, "Run", error);
		goto quit_error;
	}

	audio->sample_rate = HTSsynth->engine.global.sampling_rate;
	audio->num_samples = (uint32)HTS_GStreamSet_get_total_nsample(&(HTSsynth->engine).gss);
	audio->samples = S_MALLOC(float, audio->num_samples);
	if (audio->samples == NULL)
	{
		S_FTL_ERR(error, S_MEMERROR,
				  "Run",
				  "Failed to allocate memory for 'float' object");
		goto quit_error;
	}

	/* write data */
	for (i = 0; i < audio->num_samples; i++)
		audio->samples[i] = (float)(HTS_GStreamSet_get_speech(&(HTSsynth->engine).gss, i) * 1.0);

	for (counter = 0; counter < label_size; counter++)
		S_FREE(label_data[counter]);
	S_FREE(label_data);

	HTS_Engine_refresh(&(HTSsynth->engine));

	/* all OK here */
	return;

	/* error clean-up code */
quit_error:
	if (label_data != NULL)
	{
		for (counter = 0; counter < label_size; counter++)
		{
			if (label_data[counter] != NULL)
				S_FREE(label_data[counter]);
		}

		S_FREE(label_data);
	}

	return;
}