/* HTS_Engine_save_label: save label with time */ void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp) { size_t i, j; size_t frame, state, duration; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; size_t nstate = HTS_ModelSet_get_nstate(&engine->ms); double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency; for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) { for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i)); frame += duration; } }
/* HTS_Engine_save_label: output label with time */ void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp) { int i, j; int frame, state, duration; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; const int nstate = HTS_ModelSet_get_nstate(&engine->ms); const double rate = engine->global.fperiod * 1e+7 / engine->global.sampling_rate; for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) { for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); /* in HTK & HTS format */ fprintf(fp, "%d %d %s\n", (int) (frame * rate), (int) ((frame + duration) * rate), HTS_Label_get_string(label, i)); frame += duration; } }
/* HTS_SStreamSet_create: parse label and determine state duration */ HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, char** label, size_t num_labels, double speed) { size_t i, j, k; double temp; int shift; size_t state; HTS_SStream *sst; double *duration_mean, *duration_vari; double frame_length; /* initialize state sequence */ sss->nstate = HTS_ModelSet_get_nstate(ms); sss->nstream = HTS_ModelSet_get_nstream(ms); sss->total_frame = 0; sss->total_state = num_labels * sss->nstate; sss->duration = cst_alloc(size_t,sss->total_state); sss->sstream = cst_alloc(HTS_SStream,sss->nstream); for (i = 0; i < sss->nstream; i++) { sst = &sss->sstream[i]; sst->vector_length = HTS_ModelSet_get_vector_length(ms, i); sst->mean = cst_alloc(double *,sss->total_state); sst->vari = cst_alloc(double *,sss->total_state); if (HTS_ModelSet_is_msd(ms, i)) sst->msd = cst_alloc(double,sss->total_state); else sst->msd = NULL; for (j = 0; j < sss->total_state; j++) { sst->mean[j] = cst_alloc(double,(sst->vector_length * HTS_ModelSet_get_window_size(ms, i))); sst->vari[j] = cst_alloc(double,(sst->vector_length * HTS_ModelSet_get_window_size(ms, i))); } if (HTS_ModelSet_use_gv(ms, i)) { sst->gv_switch = cst_alloc(HTS_Boolean,sss->total_state); for (j = 0; j < sss->total_state; j++) sst->gv_switch[j] = TRUE; } else { sst->gv_switch = NULL; } }
/* HTS_Engine_save_information: save trace information */ void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp) { size_t i, j, k, l, m, n; double temp; HTS_Condition *condition = &engine->condition; HTS_ModelSet *ms = &engine->ms; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; HTS_PStreamSet *pss = &engine->pss; /* global parameter */ fprintf(fp, "[Global parameter]\n"); fprintf(fp, "Sampring frequency -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency); fprintf(fp, "Frame period -> %8lu(point)\n", (unsigned long) condition->fperiod); fprintf(fp, " %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency); fprintf(fp, "All-pass constant -> %8.5f\n", (float) condition->alpha); fprintf(fp, "Gamma -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage)); if (condition->stage != 0) { if (condition->use_log_gain == TRUE) fprintf(fp, "Log gain flag -> TRUE\n"); else fprintf(fp, "Log gain flag -> FALSE\n"); } fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) condition->beta); fprintf(fp, "Audio buffer size -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size); fprintf(fp, "\n"); /* duration parameter */ fprintf(fp, "[Duration parameter]\n"); fprintf(fp, "Number of states -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms)); fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); /* check interpolation */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++) temp += condition->duration_iw[i]; for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++) if (condition->duration_iw[i] != 0.0) condition->duration_iw[i] /= temp; for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++) fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i])); fprintf(fp, "\n"); fprintf(fp, "[Stream parameter]\n"); for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) { /* stream parameter */ fprintf(fp, "Stream[%2lu] vector length -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i)); fprintf(fp, " Dynamic window size -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i)); /* interpolation */ fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) temp += condition->parameter_iw[j][i]; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) if (condition->parameter_iw[j][i] != 0.0) condition->parameter_iw[j][i] /= temp; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[j][i])); /* MSD */ if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */ fprintf(fp, " MSD flag -> TRUE\n"); fprintf(fp, " MSD threshold -> %8.5f\n", condition->msd_threshold[i]); } else { /* for non MSD */ fprintf(fp, " MSD flag -> FALSE\n"); } /* GV */ if (HTS_ModelSet_use_gv(ms, i)) { fprintf(fp, " GV flag -> TRUE\n"); fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i])); fprintf(fp, " GV interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); /* interpolation */ for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) temp += condition->gv_iw[j][i]; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) if (condition->gv_iw[j][i] != 0.0) condition->gv_iw[j][i] /= temp; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) fprintf(fp, " GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[j][i])); } else { fprintf(fp, " GV flag -> FALSE\n"); } } fprintf(fp, "\n"); /* generated sequence */ fprintf(fp, "[Generated sequence]\n"); fprintf(fp, "Number of HMMs -> %8lu\n", (unsigned long) HTS_Label_get_size(label)); fprintf(fp, "Number of stats -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms)); fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency)); fprintf(fp, " -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod); for (i = 0; i < HTS_Label_get_size(label); i++) { fprintf(fp, "HMM[%2lu]\n", (unsigned long) i); fprintf(fp, " Name -> %s\n", HTS_Label_get_string(label, i)); fprintf(fp, " Duration\n"); for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) { fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) j); HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l); fprintf(fp, " Tree index -> %8lu\n", (unsigned long) k); fprintf(fp, " PDF index -> %8lu\n", (unsigned long) l); } for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) { fprintf(fp, " State[%2lu]\n", (unsigned long) j + 2); fprintf(fp, " Length -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j)); for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) { fprintf(fp, " Stream[%2lu]\n", (unsigned long) k); if (HTS_ModelSet_is_msd(ms, k)) { if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k]) fprintf(fp, " MSD flag -> TRUE\n"); else fprintf(fp, " MSD flag -> FALSE\n"); } for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) { fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) l); HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n); fprintf(fp, " Tree index -> %8lu\n", (unsigned long) m); fprintf(fp, " PDF index -> %8lu\n", (unsigned long) n); } } } } }
/* HTS_Engine_get_nstate: get number of state */ size_t HTS_Engine_get_nstate(HTS_Engine * engine) { return HTS_ModelSet_get_nstate(&engine->ms); }
/* HTS_SStreamSet_create: parse label and determine state duration */ HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, double *duration_iw, double **parameter_iw, double **gv_iw) { int i, j, k; double temp; int state; HTS_SStream *sst; double *duration_mean, *duration_vari; double frame_length; int next_time; int next_state; /* check interpolation weights */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) temp += duration_iw[i]; if (temp == 0.0) return FALSE; for (i = 0; i < sss->nstream; i++) { for (j = 0, temp = 0.0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) temp += parameter_iw[i][j]; if (temp == 0.0) return FALSE; if (HTS_ModelSet_use_gv(ms, i)) { for (j = 0, temp = 0.0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) temp += gv_iw[i][j]; return FALSE; } } /* initialize state sequence */ sss->nstate = HTS_ModelSet_get_nstate(ms); sss->nstream = HTS_ModelSet_get_nstream(ms); sss->total_frame = 0; sss->total_state = HTS_Label_get_size(label) * sss->nstate; sss->duration = (int *) HTS_calloc(sss->total_state, sizeof(int)); sss->sstream = (HTS_SStream *) HTS_calloc(sss->nstream, sizeof(HTS_SStream)); for (i = 0; i < sss->nstream; i++) { sst = &sss->sstream[i]; sst->vector_length = HTS_ModelSet_get_vector_length(ms, i); sst->mean = (double **) HTS_calloc(sss->total_state, sizeof(double *)); sst->vari = (double **) HTS_calloc(sss->total_state, sizeof(double *)); if (HTS_ModelSet_is_msd(ms, i)) sst->msd = (double *) HTS_calloc(sss->total_state, sizeof(double)); else sst->msd = NULL; for (j = 0; j < sss->total_state; j++) { sst->mean[j] = (double *) HTS_calloc(sst->vector_length, sizeof(double)); sst->vari[j] = (double *) HTS_calloc(sst->vector_length, sizeof(double)); } sst->gv_switch = (HTS_Boolean *) HTS_calloc(sss->total_state, sizeof(HTS_Boolean)); for (j = 0; j < sss->total_state; j++) sst->gv_switch[j] = TRUE; } /* check interpolation weights */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) temp += duration_iw[i]; for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) if (duration_iw[i] != 0.0) duration_iw[i] /= temp; for (i = 0; i < sss->nstream; i++) { for (j = 0, temp = 0.0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) temp += parameter_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) if (parameter_iw[i][j] != 0.0) parameter_iw[i][j] /= temp; if (HTS_ModelSet_use_gv(ms, i)) { for (j = 0, temp = 0.0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) temp += gv_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) if (gv_iw[i][j] != 0.0) gv_iw[i][j] /= temp; } } /* determine state duration */ duration_mean = (double *) HTS_calloc(sss->nstate * HTS_Label_get_size(label), sizeof(double)); duration_vari = (double *) HTS_calloc(sss->nstate * HTS_Label_get_size(label), sizeof(double)); for (i = 0; i < HTS_Label_get_size(label); i++) HTS_ModelSet_get_duration(ms, HTS_Label_get_string(label, i), &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate], duration_iw); if (HTS_Label_get_frame_specified_flag(label)) { /* use duration set by user */ next_time = 0; next_state = 0; state = 0; for (i = 0; i < HTS_Label_get_size(label); i++) { temp = HTS_Label_get_end_frame(label, i); if (temp >= 0) { next_time += HTS_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time); next_state = state + sss->nstate; } else if (i + 1 == HTS_Label_get_size(label)) { HTS_error(-1, "HTS_SStreamSet_create: The time of final label is not specified.\n"); HTS_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, 0.0); } state += sss->nstate; } } else { /* determine frame length */ if (HTS_Label_get_speech_speed(label) != 1.0) { temp = 0.0; for (i = 0; i < HTS_Label_get_size(label) * sss->nstate; i++) { temp += duration_mean[i]; } frame_length = temp / HTS_Label_get_speech_speed(label); } else { frame_length = 0.0; } /* set state duration */ HTS_set_duration(sss->duration, duration_mean, duration_vari, HTS_Label_get_size(label) * sss->nstate, frame_length); } HTS_free(duration_mean); HTS_free(duration_vari); /* get parameter */ for (i = 0, state = 0; i < HTS_Label_get_size(label); i++) { for (j = 2; j <= sss->nstate + 1; j++) { sss->total_frame += sss->duration[state]; for (k = 0; k < sss->nstream; k++) { sst = &sss->sstream[k]; if (sst->msd) HTS_ModelSet_get_parameter(ms, HTS_Label_get_string(label, i), sst->mean[state], sst->vari[state], &sst->msd[state], k, j, parameter_iw[k]); else HTS_ModelSet_get_parameter(ms, HTS_Label_get_string(label, i), sst->mean[state], sst->vari[state], NULL, k, j, parameter_iw[k]); } state++; } } /* copy dynamic window */ for (i = 0; i < sss->nstream; i++) { sst = &sss->sstream[i]; sst->win_size = HTS_ModelSet_get_window_size(ms, i); sst->win_max_width = HTS_ModelSet_get_window_max_width(ms, i); sst->win_l_width = (int *) HTS_calloc(sst->win_size, sizeof(int)); sst->win_r_width = (int *) HTS_calloc(sst->win_size, sizeof(int)); sst->win_coefficient = (double **) HTS_calloc(sst->win_size, sizeof(double)); for (j = 0; j < sst->win_size; j++) { sst->win_l_width[j] = HTS_ModelSet_get_window_left_width(ms, i, j); sst->win_r_width[j] = HTS_ModelSet_get_window_right_width(ms, i, j); if (sst->win_l_width[j] + sst->win_r_width[j] == 0) sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double)); else sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j], sizeof(double)); sst->win_coefficient[j] -= sst->win_l_width[j]; for (k = sst->win_l_width[j]; k <= sst->win_r_width[j]; k++) sst->win_coefficient[j][k] = HTS_ModelSet_get_window_coefficient(ms, i, j, k); } } /* determine GV */ for (i = 0; i < sss->nstream; i++) { sst = &sss->sstream[i]; if (HTS_ModelSet_use_gv(ms, i)) { sst->gv_mean = (double *) HTS_calloc(sst->vector_length / sst->win_size, sizeof(double)); sst->gv_vari = (double *) HTS_calloc(sst->vector_length / sst->win_size, sizeof(double)); HTS_ModelSet_get_gv(ms, HTS_Label_get_string(label, 0), sst->gv_mean, sst->gv_vari, i, gv_iw[i]); } else { sst->gv_mean = NULL; sst->gv_vari = NULL; } } if (HTS_ModelSet_have_gv_switch(ms) == TRUE) for (i = 0; i < HTS_Label_get_size(label); i++) if (HTS_ModelSet_get_gv_switch(ms, HTS_Label_get_string(label, i)) == FALSE) for (j = 0; j < sss->nstream; j++) for (k = 0; k < sss->nstate; k++) sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE; return TRUE; }
/* HTS_Engine_save_information: output trace information */ void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp) { int i, j, k, l, m, n; double temp; HTS_Global *global = &engine->global; HTS_ModelSet *ms = &engine->ms; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; HTS_PStreamSet *pss = &engine->pss; /* global parameter */ fprintf(fp, "[Global parameter]\n"); fprintf(fp, "Sampring frequency -> %8d(Hz)\n", global->sampling_rate); fprintf(fp, "Frame period -> %8d(point)\n", global->fperiod); fprintf(fp, " %8.5f(msec)\n", 1e+3 * global->fperiod / global->sampling_rate); fprintf(fp, "All-pass constant -> %8.5f\n", (float) global->alpha); fprintf(fp, "Gamma -> %8.5f\n", (float) (global->stage == 0 ? 0.0 : -1.0 / global->stage)); if (global->stage != 0) fprintf(fp, "Log gain flag -> %s\n", global->use_log_gain ? "TRUE" : "FALSE"); fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) global->beta); fprintf(fp, "Audio buffer size -> %8d(sample)\n", global->audio_buff_size); fprintf(fp, "\n"); /* duration parameter */ fprintf(fp, "[Duration parameter]\n"); fprintf(fp, "Number of states -> %8d\n", HTS_ModelSet_get_nstate(ms)); fprintf(fp, " Interpolation -> %8d\n", HTS_ModelSet_get_duration_interpolation_size(ms)); /* check interpolation */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) temp += global->duration_iw[i]; for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) if (global->duration_iw[i] != 0.0) global->duration_iw[i] /= temp; for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", i, (float) (100 * global->duration_iw[i])); fprintf(fp, "\n"); fprintf(fp, "[Stream parameter]\n"); for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) { /* stream parameter */ fprintf(fp, "Stream[%2d] vector length -> %8d\n", i, HTS_ModelSet_get_vector_length(ms, i)); fprintf(fp, " Dynamic window size -> %8d\n", HTS_ModelSet_get_window_size(ms, i)); /* interpolation */ fprintf(fp, " Interpolation -> %8d\n", HTS_ModelSet_get_parameter_interpolation_size(ms, i)); for (j = 0, temp = 0.0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) temp += global->parameter_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) if (global->parameter_iw[i][j] != 0.0) global->parameter_iw[i][j] /= temp; for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->parameter_iw[i][j])); /* MSD */ if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */ fprintf(fp, " MSD flag -> TRUE\n"); fprintf(fp, " MSD threshold -> %8.5f\n", global->msd_threshold[i]); } else { /* for non MSD */ fprintf(fp, " MSD flag -> FALSE\n"); } /* GV */ if (HTS_ModelSet_use_gv(ms, i)) { fprintf(fp, " GV flag -> TRUE\n"); if (HTS_ModelSet_have_gv_switch(ms)) { if (HTS_ModelSet_have_gv_tree(ms, i)) { fprintf(fp, " GV type -> CDGV\n"); fprintf(fp, " -> +SWITCH\n"); } else fprintf(fp, " GV type -> SWITCH\n"); } else { if (HTS_ModelSet_have_gv_tree(ms, i)) fprintf(fp, " GV type -> CDGV\n"); else fprintf(fp, " GV type -> NORMAL\n"); } fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * global->gv_weight[i])); fprintf(fp, " GV interpolation size -> %8d\n", HTS_ModelSet_get_gv_interpolation_size(ms, i)); /* interpolation */ for (j = 0, temp = 0.0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) temp += global->gv_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) if (global->gv_iw[i][j] != 0.0) global->gv_iw[i][j] /= temp; for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) fprintf(fp, " GV interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->gv_iw[i][j])); } else { fprintf(fp, " GV flag -> FALSE\n"); } } fprintf(fp, "\n"); /* generated sequence */ fprintf(fp, "[Generated sequence]\n"); fprintf(fp, "Number of HMMs -> %8d\n", HTS_Label_get_size(label)); fprintf(fp, "Number of stats -> %8d\n", HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms)); fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * global->fperiod / global->sampling_rate)); fprintf(fp, " -> %8.3d(frames)\n", HTS_PStreamSet_get_total_frame(pss) * global->fperiod); for (i = 0; i < HTS_Label_get_size(label); i++) { fprintf(fp, "HMM[%2d]\n", i); fprintf(fp, " Name -> %s\n", HTS_Label_get_string(label, i)); fprintf(fp, " Duration\n"); for (j = 0; j < HTS_ModelSet_get_duration_interpolation_size(ms); j++) { fprintf(fp, " Interpolation[%2d]\n", j); HTS_ModelSet_get_duration_index(ms, HTS_Label_get_string(label, i), &k, &l, j); fprintf(fp, " Tree index -> %8d\n", k); fprintf(fp, " PDF index -> %8d\n", l); } for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) { fprintf(fp, " State[%2d]\n", j + 2); fprintf(fp, " Length -> %8d(frames)\n", HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j)); for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) { fprintf(fp, " Stream[%2d]\n", k); if (HTS_ModelSet_is_msd(ms, k)) { if (HTS_SStreamSet_get_msd (sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > global->msd_threshold[k]) fprintf(fp, " MSD flag -> TRUE\n"); else fprintf(fp, " MSD flag -> FALSE\n"); } for (l = 0; l < HTS_ModelSet_get_parameter_interpolation_size(ms, k); l++) { fprintf(fp, " Interpolation[%2d]\n", l); HTS_ModelSet_get_parameter_index(ms, HTS_Label_get_string(label, i), &m, &n, k, j + 2, l); fprintf(fp, " Tree index -> %8d\n", m); fprintf(fp, " PDF index -> %8d\n", n); } } } } }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { SHTSEngineSynthUttProc104 *HTSsynth = (SHTSEngineSynthUttProc104*)self; SPlugin *audioPlugin; const SRelation *segmentRel; SAudio *audio = NULL; s_bool is_present; char **label_data = NULL; int label_size; const SItem *item; const SItem *itemItr; int counter; uint i; int frame; int state; S_CLR_ERR(error); /* we require the segment relation */ is_present = SUtteranceRelationIsPresent(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceRelationIsPresent\" failed")) goto quit_error; if (!is_present) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to find 'Segment' relation in utterance"); goto quit_error; } segmentRel = SUtteranceGetRelation(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) goto quit_error; item = SRelationHead(segmentRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; itemItr = item; label_size = 0; while (itemItr != NULL) { label_size++; itemItr = SItemNext(itemItr, error); } label_data = S_CALLOC(char*, label_size); itemItr = item; counter = 0; while (itemItr != NULL) { SObject *dFeat; const char *tmp; dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToFeatProc\" failed")) goto quit_error; if (dFeat == NULL) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to generate hts labels for segment item"); goto quit_error; } tmp = SObjectGetString(dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectGetString\" failed")) goto quit_error; label_data[counter++] = s_strdup(tmp, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_strdup\" failed")) goto quit_error; SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetObject\" failed")) goto quit_error; itemItr = SItemNext(itemItr, error); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size); HTS_Engine_create_sstream(&(HTSsynth->engine)); HTS_Engine_create_pstream(&(HTSsynth->engine)); HTS_Engine_create_gstream(&(HTSsynth->engine)); itemItr = item; counter = 0; frame = 0; state = 0; while (itemItr != NULL) { int j; int duration; HTS_SStreamSet *sss = &(HTSsynth->engine.sss); const int nstate = HTS_ModelSet_get_nstate(&(HTSsynth->engine.ms)); const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate; float tmp; for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); tmp = frame * rate; SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; tmp = (frame + duration) * rate; SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; frame += duration; itemItr = SItemNext(itemItr, error); counter++; } /* create an audio object */ audio = S_NEW(SAudio, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to create new 'SAudio' object")) goto quit_error; /* set audio feature in utterance */ SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; /* We need to give the utterance the audio plug-in. If we don't do * this and the voice is deleted before the utterance, then the * utterance can't do *anything* with the audio. Not even delete * it (segfault). This should be fast because it is already * loaded. * Note that this happens after the audio is set. This is because * utt features are a list implementation. */ audioPlugin = s_pm_load_plugin("audio.spi", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) { S_DELETE(audioPlugin, "Run", error); goto quit_error; } audio->sample_rate = HTSsynth->engine.global.sampling_rate; audio->num_samples = (uint32)HTS_GStreamSet_get_total_nsample(&(HTSsynth->engine).gss); audio->samples = S_MALLOC(float, audio->num_samples); if (audio->samples == NULL) { S_FTL_ERR(error, S_MEMERROR, "Run", "Failed to allocate memory for 'float' object"); goto quit_error; } /* write data */ for (i = 0; i < audio->num_samples; i++) audio->samples[i] = (float)(HTS_GStreamSet_get_speech(&(HTSsynth->engine).gss, i) * 1.0); for (counter = 0; counter < label_size; counter++) S_FREE(label_data[counter]); S_FREE(label_data); HTS_Engine_refresh(&(HTSsynth->engine)); /* all OK here */ return; /* error clean-up code */ quit_error: if (label_data != NULL) { for (counter = 0; counter < label_size; counter++) { if (label_data[counter] != NULL) S_FREE(label_data[counter]); } S_FREE(label_data); } return; }