/* HTS_freqt: frequency transformation */ static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a) { int i, j; const double b = 1 - a * a; double *g; if (m2 > v->freqt_size) { if (v->freqt_buff != NULL) HTS_free(v->freqt_buff); v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double)); v->freqt_size = m2; } g = v->freqt_buff + v->freqt_size + 1; for (i = 0; i < m2 + 1; i++) g[i] = 0.0; for (i = -m1; i <= 0; i++) { if (0 <= m2) g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]); if (1 <= m2) g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]); for (j = 2; j <= m2; j++) g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]); } HTS_movem(g, c2, m2 + 1); }
/* HTS_gc2gc: generalized cepstral transformation */ static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2) { int i, min, k, mk; double ss1, ss2, cc; if (m1 > v->gc2gc_size) { if (v->gc2gc_buff != NULL) HTS_free(v->gc2gc_buff); v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double)); v->gc2gc_size = m1; } HTS_movem(c1, v->gc2gc_buff, m1 + 1); c2[0] = v->gc2gc_buff[0]; for (i = 1; i <= m2; i++) { ss1 = ss2 = 0.0; min = m1 < i ? m1 : i - 1; for (k = 1; k <= min; k++) { mk = i - k; cc = v->gc2gc_buff[k] * c2[mk]; ss2 += k * cc; ss1 += mk * cc; } if (i <= m1) c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i; else c2[i] = (g2 * ss2 - g1 * ss1) / i; } }
/* HTS_gnorm: gain normalization */ static void HTS_gnorm(double *c1, double *c2, int m, const double g) { double k; if (g != 0.0) { k = 1.0 + g * c1[0]; for (; m >= 1; m--) c2[m] = c1[m] / k; c2[0] = pow(k, 1.0 / g); } else { HTS_movem(&c1[1], &c2[1], m); c2[0] = exp(c1[0]); } }
/* HTS_ignorm: inverse gain normalization */ static void HTS_ignorm(double *c1, double *c2, int m, const double g) { double k; if (g != 0.0) { k = pow(c1[0], g); for (; m >= 1; m--) c2[m] = k * c1[m]; c2[0] = (k - 1.0) / g; } else { HTS_movem(&c1[1], &c2[1], m); c2[0] = log(c1[0]); } }
/* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */ static void HTS_mc2b(double *mc, double *b, int m, const double a) { if (mc != b) { if (a != 0.0) { b[m] = mc[m]; for (m--; m >= 0; m--) b[m] = mc[m] - a * b[m + 1]; } else HTS_movem(mc, b, m + 1); } else if (a != 0.0) for (m--; m >= 0; m--) b[m] -= a * b[m + 1]; }
/* HTS_Vocoder_postfilter_lsp: postfilter for LSP */ static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta) { double e1, e2; size_t i; double d1, d2; if (beta > 0.0 && m > 1) { if (v->postfilter_size < m) { if (v->postfilter_buff != NULL) HTS_free(v->postfilter_buff); v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double)); v->postfilter_size = m; } e1 = HTS_lsp2en(v, lsp, m, alpha); /* postfiltering */ for (i = 0; i <= m; i++) { if (i > 1 && i < m) { d1 = beta * (lsp[i + 1] - lsp[i]); d2 = beta * (lsp[i] - lsp[i - 1]); v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1)); } else { v->postfilter_buff[i] = lsp[i]; } } HTS_movem(v->postfilter_buff, lsp, m + 1); e2 = HTS_lsp2en(v, lsp, m, alpha); if (e1 != e2) { if (v->use_log_gain) lsp[0] += 0.5 * log(e1 / e2); else lsp[0] *= sqrt(e1 / e2); } } }
/* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */ void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio) { double x; int i, j; short xs; int rawidx = 0; double p; /* lf0 -> pitch */ if (lf0 == LZERO) p = 0.0; else if (lf0 <= MIN_LF0) p = v->rate / MIN_F0; else if (lf0 >= MAX_LF0) p = v->rate / MAX_F0; else p = v->rate / exp(lf0); /* first time */ if (v->is_first == TRUE) { HTS_Vocoder_initialize_excitation(v, p, nlpf); if (v->stage == 0) { /* for MCP */ HTS_mc2b(spectrum, v->c, m, alpha); } else { /* for LSP */ HTS_movem(spectrum, v->c, m + 1); HTS_lsp2mgc(v, v->c, v->c, m, alpha); HTS_mc2b(v->c, v->c, m, alpha); HTS_gnorm(v->c, v->c, m, v->gamma); for (i = 1; i <= m; i++) v->c[i] *= v->gamma; } v->is_first = FALSE; } HTS_Vocoder_start_excitation(v, p); if (v->stage == 0) { /* for MCP */ HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); HTS_mc2b(spectrum, v->cc, m, alpha); for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; } else { /* for LSP */ HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta); HTS_check_lsp_stability(spectrum, m); HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); HTS_mc2b(v->cc, v->cc, m, alpha); HTS_gnorm(v->cc, v->cc, m, v->gamma); for (i = 1; i <= m; i++) v->cc[i] *= v->gamma; for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; } for (j = 0; j < v->fprd; j++) { x = HTS_Vocoder_get_excitation(v, lpf); if (v->stage == 0) { /* for MCP */ if (x != 0.0) x *= exp(v->c[0]); x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1); } else { /* for LSP */ if (!NGAIN) x *= v->c[0]; x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); } x *= volume; /* output */ if (rawdata) rawdata[rawidx++] = x; if (audio) { if (x > 32767.0) xs = 32767; else if (x < -32768.0) xs = -32768; else xs = (short) x; HTS_Audio_write(audio, xs); } for (i = 0; i <= m; i++) v->c[i] += v->cinc[i]; } HTS_Vocoder_end_excitation(v, p); HTS_movem(v->cc, v->c, m + 1); }
/* HTS_lsp2lpc: transform LSP to LPC */ static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m) { int i, k, mh1, mh2, flag_odd; double xx, xf, xff; double *p, *q; double *a0, *a1, *a2, *b0, *b1, *b2; flag_odd = 0; if (m % 2 == 0) mh1 = mh2 = m / 2; else { mh1 = (m + 1) / 2; mh2 = (m - 1) / 2; flag_odd = 1; } if (m > v->lsp2lpc_size) { if (v->lsp2lpc_buff != NULL) HTS_free(v->lsp2lpc_buff); v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double)); v->lsp2lpc_size = m; } p = v->lsp2lpc_buff + m; q = p + mh1; a0 = q + mh2; a1 = a0 + (mh1 + 1); a2 = a1 + (mh1 + 1); b0 = a2 + (mh1 + 1); b1 = b0 + (mh2 + 1); b2 = b1 + (mh2 + 1); HTS_movem(lsp, v->lsp2lpc_buff, m); for (i = 0; i < mh1 + 1; i++) a0[i] = 0.0; for (i = 0; i < mh1 + 1; i++) a1[i] = 0.0; for (i = 0; i < mh1 + 1; i++) a2[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b0[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b1[i] = 0.0; for (i = 0; i < mh2 + 1; i++) b2[i] = 0.0; /* lsp filter parameters */ for (i = k = 0; i < mh1; i++, k += 2) p[i] = -2.0 * cos(v->lsp2lpc_buff[k]); for (i = k = 0; i < mh2; i++, k += 2) q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]); /* impulse response of analysis filter */ xx = 1.0; xf = xff = 0.0; for (k = 0; k <= m; k++) { if (flag_odd) { a0[0] = xx; b0[0] = xx - xff; xff = xf; xf = xx; } else { a0[0] = xx + xf; b0[0] = xx - xf; xf = xx; } for (i = 0; i < mh1; i++) { a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i]; a2[i] = a1[i]; a1[i] = a0[i]; } for (i = 0; i < mh2; i++) { b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i]; b2[i] = b1[i]; b1[i] = b0[i]; } if (k != 0) a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]); xx = 0.0; } for (i = m - 1; i >= 0; i--) a[i + 1] = -a[i]; a[0] = 1.0; }
/* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */ void HTS_Vocoder_synthesize(HTS_Vocoder *v, const int m, double lf0, double *spectrum, double alpha, double beta, short *rawdata) { double x; int i, j; short xs; int rawidx = 0; double p; /* lf0 -> pitch */ if (lf0 == LZERO) p = 0.0; else p = v->rate / exp(lf0); /* first time */ if (v->p1 < 0.0) { if (v->gauss & (v->seed != 1)) v->next = HTS_srnd((unsigned) v->seed); HTS_Vocoder_initialize_excitation(v); if (v->stage != 0) { /* for LSP */ if (v->use_log_gain) v->c[0] = LZERO; else v->c[0] = ZERO; for (i = 0; i <= m; i++) v->c[i] = i * PI / (m + 1); HTS_lsp2mgc(v, v->c, v->c, m, alpha); HTS_mc2b(v->c, v->c, m, alpha); HTS_gnorm(v->c, v->c, m, v->gamma); for (i = 1; i <= m; i++) v->c[i] *= v->gamma; } } HTS_Vocoder_start_excitation(v, p); if (v->stage == 0) { /* for MCP */ HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); HTS_mc2b(spectrum, v->cc, m, alpha); for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) * v->iprd / v->fprd; } else { /* for LSP */ HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); HTS_mc2b(v->cc, v->cc, m, alpha); HTS_gnorm(v->cc, v->cc, m, v->gamma); for (i = 1; i <= m; i++) v->cc[i] *= v->gamma; for (i = 0; i <= m; i++) v->cinc[i] = (v->cc[i] - v->c[i]) * v->iprd / v->fprd; } for (j = 0, i = (v->iprd + 1) / 2; j < v->fprd; j++) { x = HTS_Vocoder_get_excitation(v, j, i); if (v->stage == 0) { /* for MCP */ if (x != 0.0) x *= exp(v->c[0]); x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1, v->pade); } else { /* for LSP */ if (!NGAIN) x *= v->c[0]; x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); } xs = (short) (1.00*x); if (rawdata) rawdata[rawidx++] = xs; if (v->audio) HTS_Audio_write(v->audio, xs); if (!--i) { for (i = 0; i <= m; i++) v->c[i] += v->cinc[i]; i = v->iprd; } } HTS_Vocoder_end_excitation(v); HTS_movem(v->cc, v->c, m + 1); }