static YAP_Bool Q(void) { YAP_Term arg1, arg2, arg3, arg4, out, out1, term, nodesTerm, ruleTerm, tail, pair, compoundTerm; DdNode *node1, **nodes_ex; int r, lenNodes, i; double p1, p0, **eta_rule, CLL; arg1 = YAP_ARG1; arg2 = YAP_ARG2; arg3 = YAP_ARG3; arg4 = YAP_ARG4; nodesTerm = arg1; lenNodes = YAP_IntOfTerm(arg2); nodes_ex = (DdNode **)malloc(lenNodes * sizeof(DdNode *)); example_prob = (double *)malloc(lenNodes * sizeof(double)); for (i = 0; i < lenNodes; i++) { pair = YAP_HeadOfTerm(nodesTerm); node1 = (DdNode *)YAP_IntOfTerm(YAP_HeadOfTerm(pair)); nodes_ex[i] = node1; pair = YAP_TailOfTerm(pair); example_prob[i] = YAP_FloatOfTerm(YAP_HeadOfTerm(pair)); nodesTerm = YAP_TailOfTerm(nodesTerm); } for (r = 0; r < nRules; r++) { for (i = 0; i < rules[r] - 1; i++) { eta_rule = eta[r]; eta_rule[i][0] = 0; eta_rule[i][1] = 0; } } CLL = Expectation(nodes_ex, lenNodes); out = YAP_TermNil(); for (r = 0; r < nRules; r++) { tail = YAP_TermNil(); eta_rule = eta[r]; for (i = 0; i < rules[r] - 1; i++) { p0 = eta_rule[i][0]; p1 = eta_rule[i][1]; term = YAP_MkPairTerm(YAP_MkFloatTerm(p0), YAP_MkPairTerm(YAP_MkFloatTerm(p1), YAP_TermNil())); tail = YAP_MkPairTerm(term, tail); } ruleTerm = YAP_MkIntTerm(r); compoundTerm = YAP_MkPairTerm(ruleTerm, YAP_MkPairTerm(tail, YAP_TermNil())); out = YAP_MkPairTerm(compoundTerm, out); } free(nodes_ex); free(example_prob); out1 = YAP_MkFloatTerm(CLL); YAP_Unify(out1, arg4); return (YAP_Unify(out, arg3)); }
void estimate_multi_modals(float **x, /* The observation vectors */ int N, /* Number of observation vectors */ int Ndim, /* Dimensionality of observations */ int K, /* Number of modes in the distribution */ float **mean, /* The means of the various modes */ float **var, /* The variances of all the modes */ float *c, /* A-priori probabilities of each of the modes, or mixing proportion */ char *tempfile, /* File to store temporary distributions */ int numiters, /* Number of iterations of EM to run */ float Threshold /* Convergence ratio */ ) { float **Newvar, **hafinvvar, **Newmean, *Newc, *Tau, *corprod, Const, SumNewc, Prevlogprob, LogProb, Improvement; int i, j, k, iter = 0; FILE *temp; /* * The Constant term that occurs in the computation of the gaussians. * As it turns out, we never use it. This one is a dummy :-) */ Const = pow(2 * PI, (Ndim / 2)); /* * some initializations */ Improvement = 100; /* * Allocate spaces for the local parameter arrays */ Newmean = (float **) ckd_calloc_2d(K, Ndim, sizeof(float)); Newvar = (float **) ckd_calloc_2d(K, Ndim, sizeof(float)); hafinvvar = (float **) ckd_calloc_2d(K, Ndim, sizeof(float)); Newc = (float *) ckd_calloc(K, sizeof(float)); Tau = (float *) ckd_calloc(K, sizeof(float)); corprod = (float *) ckd_calloc(K, sizeof(float)); /* * Initialize all New values to 0 * Note the array position computation for Newmean, as it is a 1-D array */ for (k = 0; k < K; ++k) { Newc[k] = 0; for (j = 0; j < Ndim; ++j) { Newmean[k][j] = 0; Newvar[k][j] = 0; } } /* * Compute the ratio of the prior probabilities of the modulus of the * variance. We do this operation in the log domain to keep the * dynamic range in check */ for (k = 0; k < K; ++k) { corprod[k] = log((double) c[k]); for (j = 0; j < Ndim; ++j) { corprod[k] -= 0.5 * log((double) var[k][j]); if (var[k][j] > 0) hafinvvar[k][j] = 1.0 / (2.0 * var[k][j]); else hafinvvar[k][j] = 1e+20; } } /* * Estimate means and variances and priors while computing overall * Likelihood * Note: variance estimated as Sum((x-mean)*(x-mean))/count * rather than Sum(x*x)/count - mean*mean * because the latter is an unstable formula and tends to give -ve * variances due to numerical errors */ Prevlogprob = 0; for (i = 0; i < N; ++i) { Prevlogprob += Expectation(Tau, x[i], c, mean, hafinvvar, corprod, K, Ndim); for (k = 0; k < K; ++k) { if (Tau[k] > 0) { Newc[k] += Tau[k]; for (j = 0; j < Ndim; ++j) Newmean[k][j] += Tau[k] * x[i][j]; } } } for (k = 0; k < K; ++k) for (j = 0; j < Ndim; ++j) Newmean[k][j] /= Newc[k]; for (i = 0; i < N; ++i) { Expectation(Tau, x[i], c, mean, hafinvvar, corprod, K, Ndim); for (k = 0; k < K; ++k) { if (Tau[k] > 0) { for (j = 0; j < Ndim; ++j) Newvar[k][j] += Tau[k] * (x[i][j] - Newmean[k][j]) * (x[i][j] - Newmean[k][j]); } } } printf("EM : Initial log probablity = %f \n", Prevlogprob); while ((Improvement > Threshold) && (iter < numiters)) { /* * We use SumNewc instead of N as in the formula because the * Newc's may not sum to N, because of accuracy errors of the * computer for large N. */ SumNewc = 0; for (k = 0; k < K; ++k) SumNewc += Newc[k]; for (k = 0; k < K; ++k) { for (j = 0; j < Ndim; ++j) { mean[k][j] = Newmean[k][j]; var[k][j] = Newvar[k][j] / Newc[k]; } c[k] = Newc[k] / SumNewc; } /* * Store partially converged distribution */ temp = fopen(tempfile, "w"); fprintf(temp, "%d %d\n", K, Ndim); for (i = 0; i < K; ++i) { fprintf(temp, "%f\n", c[i]); for (j = 0; j < Ndim; ++j) fprintf(temp, "%f ", mean[i][j]); fprintf(temp, "\n"); for (j = 0; j < Ndim; ++j) fprintf(temp, "%f ", var[i][j]); fprintf(temp, "\n"); } fclose(temp); /* * Initialize all New values to 0 */ for (k = 0; k < K; ++k) { Newc[k] = 0; for (j = 0; j < Ndim; ++j) { Newmean[k][j] = 0; Newvar[k][j] = 0; } } /* * Compute the ratio of the prior probabilities of the modulus of the * variance. We do this operation in the log domain to keep the * dynamic range in check */ for (k = 0; k < K; ++k) { corprod[k] = log((double) c[k]); for (j = 0; j < Ndim; ++j) { corprod[k] -= 0.5 * log((double) var[k][j]); if (var[k][j] > 0) hafinvvar[k][j] = 1.0 / (2.0 * var[k][j]); else hafinvvar[k][j] = 1e+20; } } LogProb = 0; for (i = 0; i < N; ++i) { LogProb += Expectation(Tau, x[i], c, mean, hafinvvar, corprod, K, Ndim); for (k = 0; k < K; ++k) { if (Tau[k] > 0) { Newc[k] += Tau[k]; for (j = 0; j < Ndim; ++j) { Newmean[k][j] += Tau[k] * x[i][j]; Newvar[k][j] += Tau[k] * (x[i][j] - mean[k][j]) * (x[i][j] - mean[k][j]); } } } } for (k = 0; k < K; ++k) for (j = 0; j < Ndim; ++j) Newmean[k][j] /= Newc[k]; /* for (i=0;i<N;++i) { Expectation(Tau,x[i],c,mean,hafinvvar,corprod,K,Ndim); for (k=0; k<K ; ++k) { for (j=0;j<Ndim;++j) Newvar[k][j] += Tau[k] * (x[i][j] - Newmean[k][j]) * (x[i][j] - Newmean[k][j]); } } */ Improvement = (LogProb - Prevlogprob) / LogProb; if (LogProb < 0) Improvement = -Improvement; ++iter; printf ("EM : Log Prob = %f, improvement = %f after %d iterations\n", LogProb, Improvement, iter); fflush(stdout); Prevlogprob = LogProb; } /* * Free local arrays */ ckd_free(Tau); ckd_free(corprod); ckd_free_2d((void **)Newvar); ckd_free_2d((void **)Newmean); ckd_free(Newc); return; }
static YAP_Bool EM(void) { YAP_Term arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, out1, out2, out3, nodesTerm, ruleTerm, tail, pair, compoundTerm; DdNode *node1, **nodes_ex; int r, lenNodes, i, iter; long iter1; double CLL0 = -2.2 * pow(10, 10); //-inf double CLL1 = -1.7 * pow(10, 8); //+inf double p, p0, **eta_rule, ea, er; double ratio, diff; arg1 = YAP_ARG1; arg2 = YAP_ARG2; arg3 = YAP_ARG3; arg4 = YAP_ARG4; arg5 = YAP_ARG5; arg6 = YAP_ARG6; arg7 = YAP_ARG7; arg8 = YAP_ARG8; nodesTerm = arg1; ea = YAP_FloatOfTerm(arg2); er = YAP_FloatOfTerm(arg3); lenNodes = YAP_IntOfTerm(arg4); iter = YAP_IntOfTerm(arg5); nodes_ex = (DdNode **)malloc(lenNodes * sizeof(DdNode *)); nodes_probs_ex = (double *)malloc(lenNodes * sizeof(double)); example_prob = (double *)malloc(lenNodes * sizeof(double)); for (i = 0; i < lenNodes; i++) { pair = YAP_HeadOfTerm(nodesTerm); node1 = (DdNode *)YAP_IntOfTerm(YAP_HeadOfTerm(pair)); nodes_ex[i] = node1; pair = YAP_TailOfTerm(pair); example_prob[i] = YAP_FloatOfTerm(YAP_HeadOfTerm(pair)); nodesTerm = YAP_TailOfTerm(nodesTerm); } diff = CLL1 - CLL0; ratio = diff / fabs(CLL0); if (iter == -1) iter1 = 2147000000; else iter1 = iter; while ((diff > ea) && (ratio > er) && (cycle < iter1)) { cycle++; for (r = 0; r < nRules; r++) { for (i = 0; i < rules[r] - 1; i++) { eta_rule = eta[r]; eta_rule[i][0] = 0; eta_rule[i][1] = 0; } } CLL0 = CLL1; CLL1 = Expectation(nodes_ex, lenNodes); Maximization(); diff = CLL1 - CLL0; ratio = diff / fabs(CLL0); } out2 = YAP_TermNil(); for (r = 0; r < nRules; r++) { tail = YAP_TermNil(); p0 = 1; for (i = 0; i < rules[r] - 1; i++) { p = arrayprob[r][i] * p0; tail = YAP_MkPairTerm(YAP_MkFloatTerm(p), tail); p0 = p0 * (1 - arrayprob[r][i]); } tail = YAP_MkPairTerm(YAP_MkFloatTerm(p0), tail); ruleTerm = YAP_MkIntTerm(r); compoundTerm = YAP_MkPairTerm(ruleTerm, YAP_MkPairTerm(tail, YAP_TermNil())); out2 = YAP_MkPairTerm(compoundTerm, out2); } out3 = YAP_TermNil(); for (i = 0; i < lenNodes; i++) { out3 = YAP_MkPairTerm(YAP_MkFloatTerm(nodes_probs_ex[i]), out3); } YAP_Unify(out3, arg8); out1 = YAP_MkFloatTerm(CLL1); YAP_Unify(out1, arg6); free(nodes_ex); free(example_prob); free(nodes_probs_ex); return (YAP_Unify(out2, arg7)); }
double SpeechKMeans::Run(int rounds) { vector<vector<vector<DataPoint> > > phoneme_states(num_types_); int num_modes = cluster_problems_.num_modes(); vector<vector<DataPoint> > center_estimators(num_modes); vector<vector<double> > center_counts(num_modes); if (use_unsupervised_ && !unsup_initialized_) { vector<double> weights; vector<DataPoint> points; for (int utterance_index = 0; utterance_index < problems_.utterance_size(); ++utterance_index) { ClusterSegmentsExpectation(utterance_index, &points, &weights); } ClusterSegmentsMaximization(&points, &weights); unsup_initialized_ = true; } double round_score = 0.0; for (int round = 0; round < rounds; ++round) { if (use_gmm_) { for (int mode = 0; mode < num_modes; ++mode) { center_estimators[mode].resize(num_types_); center_counts[mode].resize(num_types_); for (int type = 0; type < num_types_; ++type) { center_estimators[mode][type].resize(problems_.num_features(), 0.0); for (int feat = 0; feat < problems_.num_features(); ++feat) { center_estimators[mode][type][feat] = 0.0; } center_counts[mode][type] = 0.0; } } } round_score = 0.0; int total_correctness = 0; for (int utterance_index = 0; utterance_index < problems_.utterance_size(); ++utterance_index) { int correctness; if (use_unsupervised_) { round_score += UnsupExpectation(utterance_index, &correctness, &phoneme_states); total_correctness += correctness; } else if (!use_gmm_) { round_score += Expectation(utterance_index, &correctness, &phoneme_states); total_correctness += correctness; } else { round_score += GMMExpectation(utterance_index, center_estimators, center_counts); Expectation(utterance_index, &correctness, &phoneme_states); total_correctness += correctness; } } if (use_unsupervised_) { UnsupMaximization(phoneme_states); } else if (!use_gmm_) { Maximization(phoneme_states); } else { GMMMaximization(center_estimators, center_counts); } cerr << "SCORE: Round score: " << round << " " << round_score << " " << total_correctness << endl; } return round_score; }