/* HTS_Engine_save_label: save label with time */ void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp) { size_t i, j; size_t frame, state, duration; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; size_t nstate = HTS_ModelSet_get_nstate(&engine->ms); double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency; for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) { for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i)); frame += duration; } }
/* HTS_Engine_save_label: output label with time */ void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp) { int i, j; int frame, state, duration; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; const int nstate = HTS_ModelSet_get_nstate(&engine->ms); const double rate = engine->global.fperiod * 1e+7 / engine->global.sampling_rate; for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) { for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); /* in HTK & HTS format */ fprintf(fp, "%d %d %s\n", (int) (frame * rate), (int) ((frame + duration) * rate), HTS_Label_get_string(label, i)); frame += duration; } }
/* HTS_Engine_save_information: save trace information */ void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp) { size_t i, j, k, l, m, n; double temp; HTS_Condition *condition = &engine->condition; HTS_ModelSet *ms = &engine->ms; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; HTS_PStreamSet *pss = &engine->pss; /* global parameter */ fprintf(fp, "[Global parameter]\n"); fprintf(fp, "Sampring frequency -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency); fprintf(fp, "Frame period -> %8lu(point)\n", (unsigned long) condition->fperiod); fprintf(fp, " %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency); fprintf(fp, "All-pass constant -> %8.5f\n", (float) condition->alpha); fprintf(fp, "Gamma -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage)); if (condition->stage != 0) { if (condition->use_log_gain == TRUE) fprintf(fp, "Log gain flag -> TRUE\n"); else fprintf(fp, "Log gain flag -> FALSE\n"); } fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) condition->beta); fprintf(fp, "Audio buffer size -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size); fprintf(fp, "\n"); /* duration parameter */ fprintf(fp, "[Duration parameter]\n"); fprintf(fp, "Number of states -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms)); fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); /* check interpolation */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++) temp += condition->duration_iw[i]; for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++) if (condition->duration_iw[i] != 0.0) condition->duration_iw[i] /= temp; for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++) fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i])); fprintf(fp, "\n"); fprintf(fp, "[Stream parameter]\n"); for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) { /* stream parameter */ fprintf(fp, "Stream[%2lu] vector length -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i)); fprintf(fp, " Dynamic window size -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i)); /* interpolation */ fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) temp += condition->parameter_iw[j][i]; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) if (condition->parameter_iw[j][i] != 0.0) condition->parameter_iw[j][i] /= temp; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[j][i])); /* MSD */ if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */ fprintf(fp, " MSD flag -> TRUE\n"); fprintf(fp, " MSD threshold -> %8.5f\n", condition->msd_threshold[i]); } else { /* for non MSD */ fprintf(fp, " MSD flag -> FALSE\n"); } /* GV */ if (HTS_ModelSet_use_gv(ms, i)) { fprintf(fp, " GV flag -> TRUE\n"); fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i])); fprintf(fp, " GV interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms)); /* interpolation */ for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) temp += condition->gv_iw[j][i]; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) if (condition->gv_iw[j][i] != 0.0) condition->gv_iw[j][i] /= temp; for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) fprintf(fp, " GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[j][i])); } else { fprintf(fp, " GV flag -> FALSE\n"); } } fprintf(fp, "\n"); /* generated sequence */ fprintf(fp, "[Generated sequence]\n"); fprintf(fp, "Number of HMMs -> %8lu\n", (unsigned long) HTS_Label_get_size(label)); fprintf(fp, "Number of stats -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms)); fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency)); fprintf(fp, " -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod); for (i = 0; i < HTS_Label_get_size(label); i++) { fprintf(fp, "HMM[%2lu]\n", (unsigned long) i); fprintf(fp, " Name -> %s\n", HTS_Label_get_string(label, i)); fprintf(fp, " Duration\n"); for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) { fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) j); HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l); fprintf(fp, " Tree index -> %8lu\n", (unsigned long) k); fprintf(fp, " PDF index -> %8lu\n", (unsigned long) l); } for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) { fprintf(fp, " State[%2lu]\n", (unsigned long) j + 2); fprintf(fp, " Length -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j)); for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) { fprintf(fp, " Stream[%2lu]\n", (unsigned long) k); if (HTS_ModelSet_is_msd(ms, k)) { if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k]) fprintf(fp, " MSD flag -> TRUE\n"); else fprintf(fp, " MSD flag -> FALSE\n"); } for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) { fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) l); HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n); fprintf(fp, " Tree index -> %8lu\n", (unsigned long) m); fprintf(fp, " PDF index -> %8lu\n", (unsigned long) n); } } } } }
/* HTS_Engine_get_state_duration: get state duration */ size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index) { return HTS_SStreamSet_get_duration(&engine->sss, state_index); }
/* HTS_PStreamSet_create: parameter generation using GV weight */ void HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight) { int i, j, k, l, m; int frame, msd_frame, state; HTS_PStream *pst; HTS_Boolean not_bound; if (pss->nstream) HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n"); /* initialize */ pss->nstream = HTS_SStreamSet_get_nstream(sss); pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream)); pss->total_frame = HTS_SStreamSet_get_total_frame(sss); /* create */ for (i = 0; i < pss->nstream; i++) { pst = &pss->pstream[i]; if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ pst->length = 0; for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++) if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i]) pst->length += HTS_SStreamSet_get_duration(sss, state); pst->msd_flag = (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean)); for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i]) for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { pst->msd_flag[frame] = TRUE; frame++; } else for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { pst->msd_flag[frame] = FALSE; frame++; } } else { /* for non MSD */ pst->length = pss->total_frame; pst->msd_flag = NULL; } pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i); pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */ pst->win_size = HTS_SStreamSet_get_window_size(sss, i); pst->static_length = pst->vector_length / pst->win_size; pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length); pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length); pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double)); pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width); pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double)); pst->par = HTS_alloc_matrix(pst->length, pst->static_length); /* copy dynamic window */ pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int)); pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int)); pst->win_coefficient = (double **) HTS_calloc(pst->win_size, sizeof(double)); for (j = 0; j < pst->win_size; j++) { pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j); pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j); if (pst->win_l_width[j] + pst->win_r_width[j] == 0) pst->win_coefficient[j] = (double *) HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double)); else pst->win_coefficient[j] = (double *) HTS_calloc(-2 * pst->win_l_width[j], sizeof(double)); pst->win_coefficient[j] -= pst->win_l_width[j]; for (k = pst->win_l_width[j]; k <= pst->win_r_width[j]; k++) pst->win_coefficient[j][k] = HTS_SStreamSet_get_window_coefficient(sss, i, j, k); } /* copy GV */ if (HTS_SStreamSet_use_gv(sss, i)) { pst->gv_mean = (double *) HTS_calloc(pst->static_length, sizeof(double)); pst->gv_vari = (double *) HTS_calloc(pst->static_length, sizeof(double)); for (j = 0; j < pst->static_length; j++) { pst->gv_mean[j] = HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i]; pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j); } pst->gv_switch = (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean)); if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++, frame++) if (pst->msd_flag[frame]) pst->gv_switch[msd_frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state); } else { /* for non MSD */ for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) pst->gv_switch[frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state); } for (j = 0, pst->gv_length = 0; j < pst->length; j++) if (pst->gv_switch[j]) pst->gv_length++; } else { pst->gv_switch = NULL; pst->gv_length = 0; pst->gv_mean = NULL; pst->gv_vari = NULL; } /* copy pdfs */ if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { if (pst->msd_flag[frame]) { /* check current frame is MSD boundary or not */ for (k = 0; k < pst->win_size; k++) { not_bound = TRUE; for (l = pst->win_l_width[k]; l <= pst->win_r_width[k]; l++) if (frame + l < 0 || pss->total_frame <= frame + l || !pst->msd_flag[frame + l]) { not_bound = FALSE; break; } for (l = 0; l < pst->static_length; l++) { m = pst->static_length * k + l; pst->sm.mean[msd_frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m); if (not_bound || k == 0) pst->sm.ivar[msd_frame][m] = HTS_finv(HTS_SStreamSet_get_vari (sss, i, state, m)); else pst->sm.ivar[msd_frame][m] = 0.0; } } msd_frame++; } frame++; } } else { /* for non MSD */ for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) { for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { for (k = 0; k < pst->win_size; k++) { not_bound = TRUE; for (l = pst->win_l_width[k]; l <= pst->win_r_width[k]; l++) if (frame + l < 0 || pss->total_frame <= frame + l) { not_bound = FALSE; break; } for (l = 0; l < pst->static_length; l++) { m = pst->static_length * k + l; pst->sm.mean[frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m); if (not_bound || k == 0) pst->sm.ivar[frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m)); else pst->sm.ivar[frame][m] = 0.0; } } frame++; } } } /* parameter generation */ HTS_PStream_mlpg(pst); } }
/* HTS_Engine_save_information: output trace information */ void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp) { int i, j, k, l, m, n; double temp; HTS_Global *global = &engine->global; HTS_ModelSet *ms = &engine->ms; HTS_Label *label = &engine->label; HTS_SStreamSet *sss = &engine->sss; HTS_PStreamSet *pss = &engine->pss; /* global parameter */ fprintf(fp, "[Global parameter]\n"); fprintf(fp, "Sampring frequency -> %8d(Hz)\n", global->sampling_rate); fprintf(fp, "Frame period -> %8d(point)\n", global->fperiod); fprintf(fp, " %8.5f(msec)\n", 1e+3 * global->fperiod / global->sampling_rate); fprintf(fp, "All-pass constant -> %8.5f\n", (float) global->alpha); fprintf(fp, "Gamma -> %8.5f\n", (float) (global->stage == 0 ? 0.0 : -1.0 / global->stage)); if (global->stage != 0) fprintf(fp, "Log gain flag -> %s\n", global->use_log_gain ? "TRUE" : "FALSE"); fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) global->beta); fprintf(fp, "Audio buffer size -> %8d(sample)\n", global->audio_buff_size); fprintf(fp, "\n"); /* duration parameter */ fprintf(fp, "[Duration parameter]\n"); fprintf(fp, "Number of states -> %8d\n", HTS_ModelSet_get_nstate(ms)); fprintf(fp, " Interpolation -> %8d\n", HTS_ModelSet_get_duration_interpolation_size(ms)); /* check interpolation */ for (i = 0, temp = 0.0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) temp += global->duration_iw[i]; for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) if (global->duration_iw[i] != 0.0) global->duration_iw[i] /= temp; for (i = 0; i < HTS_ModelSet_get_duration_interpolation_size(ms); i++) fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", i, (float) (100 * global->duration_iw[i])); fprintf(fp, "\n"); fprintf(fp, "[Stream parameter]\n"); for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) { /* stream parameter */ fprintf(fp, "Stream[%2d] vector length -> %8d\n", i, HTS_ModelSet_get_vector_length(ms, i)); fprintf(fp, " Dynamic window size -> %8d\n", HTS_ModelSet_get_window_size(ms, i)); /* interpolation */ fprintf(fp, " Interpolation -> %8d\n", HTS_ModelSet_get_parameter_interpolation_size(ms, i)); for (j = 0, temp = 0.0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) temp += global->parameter_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) if (global->parameter_iw[i][j] != 0.0) global->parameter_iw[i][j] /= temp; for (j = 0; j < HTS_ModelSet_get_parameter_interpolation_size(ms, i); j++) fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->parameter_iw[i][j])); /* MSD */ if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */ fprintf(fp, " MSD flag -> TRUE\n"); fprintf(fp, " MSD threshold -> %8.5f\n", global->msd_threshold[i]); } else { /* for non MSD */ fprintf(fp, " MSD flag -> FALSE\n"); } /* GV */ if (HTS_ModelSet_use_gv(ms, i)) { fprintf(fp, " GV flag -> TRUE\n"); if (HTS_ModelSet_have_gv_switch(ms)) { if (HTS_ModelSet_have_gv_tree(ms, i)) { fprintf(fp, " GV type -> CDGV\n"); fprintf(fp, " -> +SWITCH\n"); } else fprintf(fp, " GV type -> SWITCH\n"); } else { if (HTS_ModelSet_have_gv_tree(ms, i)) fprintf(fp, " GV type -> CDGV\n"); else fprintf(fp, " GV type -> NORMAL\n"); } fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * global->gv_weight[i])); fprintf(fp, " GV interpolation size -> %8d\n", HTS_ModelSet_get_gv_interpolation_size(ms, i)); /* interpolation */ for (j = 0, temp = 0.0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) temp += global->gv_iw[i][j]; for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) if (global->gv_iw[i][j] != 0.0) global->gv_iw[i][j] /= temp; for (j = 0; j < HTS_ModelSet_get_gv_interpolation_size(ms, i); j++) fprintf(fp, " GV interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->gv_iw[i][j])); } else { fprintf(fp, " GV flag -> FALSE\n"); } } fprintf(fp, "\n"); /* generated sequence */ fprintf(fp, "[Generated sequence]\n"); fprintf(fp, "Number of HMMs -> %8d\n", HTS_Label_get_size(label)); fprintf(fp, "Number of stats -> %8d\n", HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms)); fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * global->fperiod / global->sampling_rate)); fprintf(fp, " -> %8.3d(frames)\n", HTS_PStreamSet_get_total_frame(pss) * global->fperiod); for (i = 0; i < HTS_Label_get_size(label); i++) { fprintf(fp, "HMM[%2d]\n", i); fprintf(fp, " Name -> %s\n", HTS_Label_get_string(label, i)); fprintf(fp, " Duration\n"); for (j = 0; j < HTS_ModelSet_get_duration_interpolation_size(ms); j++) { fprintf(fp, " Interpolation[%2d]\n", j); HTS_ModelSet_get_duration_index(ms, HTS_Label_get_string(label, i), &k, &l, j); fprintf(fp, " Tree index -> %8d\n", k); fprintf(fp, " PDF index -> %8d\n", l); } for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) { fprintf(fp, " State[%2d]\n", j + 2); fprintf(fp, " Length -> %8d(frames)\n", HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j)); for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) { fprintf(fp, " Stream[%2d]\n", k); if (HTS_ModelSet_is_msd(ms, k)) { if (HTS_SStreamSet_get_msd (sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > global->msd_threshold[k]) fprintf(fp, " MSD flag -> TRUE\n"); else fprintf(fp, " MSD flag -> FALSE\n"); } for (l = 0; l < HTS_ModelSet_get_parameter_interpolation_size(ms, k); l++) { fprintf(fp, " Interpolation[%2d]\n", l); HTS_ModelSet_get_parameter_index(ms, HTS_Label_get_string(label, i), &m, &n, k, j + 2, l); fprintf(fp, " Tree index -> %8d\n", m); fprintf(fp, " PDF index -> %8d\n", n); } } } } }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { SHTSEngineSynthUttProc104 *HTSsynth = (SHTSEngineSynthUttProc104*)self; SPlugin *audioPlugin; const SRelation *segmentRel; SAudio *audio = NULL; s_bool is_present; char **label_data = NULL; int label_size; const SItem *item; const SItem *itemItr; int counter; uint i; int frame; int state; S_CLR_ERR(error); /* we require the segment relation */ is_present = SUtteranceRelationIsPresent(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceRelationIsPresent\" failed")) goto quit_error; if (!is_present) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to find 'Segment' relation in utterance"); goto quit_error; } segmentRel = SUtteranceGetRelation(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) goto quit_error; item = SRelationHead(segmentRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; itemItr = item; label_size = 0; while (itemItr != NULL) { label_size++; itemItr = SItemNext(itemItr, error); } label_data = S_CALLOC(char*, label_size); itemItr = item; counter = 0; while (itemItr != NULL) { SObject *dFeat; const char *tmp; dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToFeatProc\" failed")) goto quit_error; if (dFeat == NULL) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to generate hts labels for segment item"); goto quit_error; } tmp = SObjectGetString(dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectGetString\" failed")) goto quit_error; label_data[counter++] = s_strdup(tmp, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_strdup\" failed")) goto quit_error; SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetObject\" failed")) goto quit_error; itemItr = SItemNext(itemItr, error); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size); HTS_Engine_create_sstream(&(HTSsynth->engine)); HTS_Engine_create_pstream(&(HTSsynth->engine)); HTS_Engine_create_gstream(&(HTSsynth->engine)); itemItr = item; counter = 0; frame = 0; state = 0; while (itemItr != NULL) { int j; int duration; HTS_SStreamSet *sss = &(HTSsynth->engine.sss); const int nstate = HTS_ModelSet_get_nstate(&(HTSsynth->engine.ms)); const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate; float tmp; for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); tmp = frame * rate; SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; tmp = (frame + duration) * rate; SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; frame += duration; itemItr = SItemNext(itemItr, error); counter++; } /* create an audio object */ audio = S_NEW(SAudio, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to create new 'SAudio' object")) goto quit_error; /* set audio feature in utterance */ SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; /* We need to give the utterance the audio plug-in. If we don't do * this and the voice is deleted before the utterance, then the * utterance can't do *anything* with the audio. Not even delete * it (segfault). This should be fast because it is already * loaded. * Note that this happens after the audio is set. This is because * utt features are a list implementation. */ audioPlugin = s_pm_load_plugin("audio.spi", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) { S_DELETE(audioPlugin, "Run", error); goto quit_error; } audio->sample_rate = HTSsynth->engine.global.sampling_rate; audio->num_samples = (uint32)HTS_GStreamSet_get_total_nsample(&(HTSsynth->engine).gss); audio->samples = S_MALLOC(float, audio->num_samples); if (audio->samples == NULL) { S_FTL_ERR(error, S_MEMERROR, "Run", "Failed to allocate memory for 'float' object"); goto quit_error; } /* write data */ for (i = 0; i < audio->num_samples; i++) audio->samples[i] = (float)(HTS_GStreamSet_get_speech(&(HTSsynth->engine).gss, i) * 1.0); for (counter = 0; counter < label_size; counter++) S_FREE(label_data[counter]); S_FREE(label_data); HTS_Engine_refresh(&(HTSsynth->engine)); /* all OK here */ return; /* error clean-up code */ quit_error: if (label_data != NULL) { for (counter = 0; counter < label_size; counter++) { if (label_data[counter] != NULL) S_FREE(label_data[counter]); } S_FREE(label_data); } return; }