Exemple #1
0
static YAP_Bool Q(void) {
  YAP_Term arg1, arg2, arg3, arg4, out, out1, term, nodesTerm, ruleTerm, tail,
      pair, compoundTerm;
  DdNode *node1, **nodes_ex;
  int r, lenNodes, i;
  double p1, p0, **eta_rule, CLL;

  arg1 = YAP_ARG1;
  arg2 = YAP_ARG2;
  arg3 = YAP_ARG3;
  arg4 = YAP_ARG4;

  nodesTerm = arg1;
  lenNodes = YAP_IntOfTerm(arg2);

  nodes_ex = (DdNode **)malloc(lenNodes * sizeof(DdNode *));
  example_prob = (double *)malloc(lenNodes * sizeof(double));

  for (i = 0; i < lenNodes; i++) {
    pair = YAP_HeadOfTerm(nodesTerm);
    node1 = (DdNode *)YAP_IntOfTerm(YAP_HeadOfTerm(pair));
    nodes_ex[i] = node1;
    pair = YAP_TailOfTerm(pair);
    example_prob[i] = YAP_FloatOfTerm(YAP_HeadOfTerm(pair));
    nodesTerm = YAP_TailOfTerm(nodesTerm);
  }

  for (r = 0; r < nRules; r++) {
    for (i = 0; i < rules[r] - 1; i++) {
      eta_rule = eta[r];
      eta_rule[i][0] = 0;
      eta_rule[i][1] = 0;
    }
  }
  CLL = Expectation(nodes_ex, lenNodes);
  out = YAP_TermNil();

  for (r = 0; r < nRules; r++) {
    tail = YAP_TermNil();
    eta_rule = eta[r];
    for (i = 0; i < rules[r] - 1; i++) {
      p0 = eta_rule[i][0];
      p1 = eta_rule[i][1];
      term = YAP_MkPairTerm(YAP_MkFloatTerm(p0),
                            YAP_MkPairTerm(YAP_MkFloatTerm(p1), YAP_TermNil()));
      tail = YAP_MkPairTerm(term, tail);
    }

    ruleTerm = YAP_MkIntTerm(r);
    compoundTerm =
        YAP_MkPairTerm(ruleTerm, YAP_MkPairTerm(tail, YAP_TermNil()));
    out = YAP_MkPairTerm(compoundTerm, out);
  }

  free(nodes_ex);
  free(example_prob);

  out1 = YAP_MkFloatTerm(CLL);
  YAP_Unify(out1, arg4);
  return (YAP_Unify(out, arg3));
}
Exemple #2
0
void estimate_multi_modals(float **x,	/* The observation vectors */
			   int N,	/* Number of observation vectors */
			   int Ndim,	/* Dimensionality of observations */
			   int K,	/* Number of modes in the distribution */
			   float **mean,	/* The means of the various modes */
			   float **var,	/* The variances of all the modes */
			   float *c,	/* A-priori probabilities of each of the
					   modes, or mixing proportion */
			   char *tempfile,	/* File to store temporary distributions */
			   int numiters,	/* Number of iterations of EM to run */
			   float Threshold      /* Convergence ratio */
    )
{
	float **Newvar, **hafinvvar,
	    **Newmean,
	    *Newc,
	    *Tau,
	    *corprod,
	    Const, SumNewc, Prevlogprob, LogProb, Improvement;

	int i, j, k, iter = 0;

	FILE *temp;

	/*
	 * The Constant term that occurs in the computation of the gaussians.
	 * As it turns out, we never use it. This one is a dummy :-)
	 */
	Const = pow(2 * PI, (Ndim / 2));

	/*
	 * some initializations
	 */
	Improvement = 100;

	/*
	 * Allocate spaces for the local parameter arrays
	 */
	Newmean = (float **) ckd_calloc_2d(K, Ndim, sizeof(float));
	Newvar = (float **) ckd_calloc_2d(K, Ndim, sizeof(float));
	hafinvvar = (float **) ckd_calloc_2d(K, Ndim, sizeof(float));
	Newc = (float *) ckd_calloc(K, sizeof(float));
	Tau = (float *) ckd_calloc(K, sizeof(float));
	corprod = (float *) ckd_calloc(K, sizeof(float));

	/*
	 * Initialize all New values to 0
	 * Note the array position computation for Newmean, as it is a 1-D array
	 */
	for (k = 0; k < K; ++k) {
		Newc[k] = 0;
		for (j = 0; j < Ndim; ++j) {
			Newmean[k][j] = 0;
			Newvar[k][j] = 0;
		}
	}

	/* 
	 * Compute the ratio of the prior probabilities of the modulus of the
	 * variance. We do this operation in the log domain to keep the
	 * dynamic range in check
	 */

	for (k = 0; k < K; ++k) {
		corprod[k] = log((double) c[k]);
		for (j = 0; j < Ndim; ++j) {
			corprod[k] -= 0.5 * log((double) var[k][j]);
			if (var[k][j] > 0)
				hafinvvar[k][j] = 1.0 / (2.0 * var[k][j]);
			else
				hafinvvar[k][j] = 1e+20;
		}
	}

	/*
	 * Estimate means and variances and priors while computing overall
	 * Likelihood
	 * Note: variance estimated as Sum((x-mean)*(x-mean))/count
	 * rather than Sum(x*x)/count - mean*mean
	 * because the latter is an unstable formula and tends to give -ve
	 * variances due to numerical errors
	 */
	Prevlogprob = 0;
	for (i = 0; i < N; ++i) {
		Prevlogprob +=
		    Expectation(Tau, x[i], c, mean, hafinvvar, corprod, K,
				Ndim);
		for (k = 0; k < K; ++k) {
			if (Tau[k] > 0) {
				Newc[k] += Tau[k];
				for (j = 0; j < Ndim; ++j)
					Newmean[k][j] += Tau[k] * x[i][j];
			}
		}
	}
	for (k = 0; k < K; ++k)
		for (j = 0; j < Ndim; ++j)
			Newmean[k][j] /= Newc[k];
	for (i = 0; i < N; ++i) {
		Expectation(Tau, x[i], c, mean, hafinvvar, corprod, K,
			    Ndim);
		for (k = 0; k < K; ++k) {
			if (Tau[k] > 0) {
				for (j = 0; j < Ndim; ++j)
					Newvar[k][j] +=
					    Tau[k] * (x[i][j] -
						      Newmean[k][j])
					    * (x[i][j] - Newmean[k][j]);
			}
		}
	}
	printf("EM : Initial log probablity = %f \n", Prevlogprob);

	while ((Improvement > Threshold) && (iter < numiters)) {
		/* 
		 * We use SumNewc instead of N as in the formula because the
		 * Newc's may not sum to N, because of accuracy errors of the 
		 * computer for large N.
		 */
		SumNewc = 0;
		for (k = 0; k < K; ++k)
			SumNewc += Newc[k];

		for (k = 0; k < K; ++k) {
			for (j = 0; j < Ndim; ++j) {
				mean[k][j] = Newmean[k][j];
				var[k][j] = Newvar[k][j] / Newc[k];
			}
			c[k] = Newc[k] / SumNewc;
		}

		/*
		 * Store partially converged distribution
		 */
		temp = fopen(tempfile, "w");
		fprintf(temp, "%d %d\n", K, Ndim);
		for (i = 0; i < K; ++i) {
			fprintf(temp, "%f\n", c[i]);
			for (j = 0; j < Ndim; ++j)
				fprintf(temp, "%f ", mean[i][j]);
			fprintf(temp, "\n");
			for (j = 0; j < Ndim; ++j)
				fprintf(temp, "%f ", var[i][j]);
			fprintf(temp, "\n");
		}
		fclose(temp);

		/*
		 * Initialize all New values to 0
		 */
		for (k = 0; k < K; ++k) {
			Newc[k] = 0;
			for (j = 0; j < Ndim; ++j) {
				Newmean[k][j] = 0;
				Newvar[k][j] = 0;
			}
		}

		/* 
		 * Compute the ratio of the prior probabilities of the modulus of the
		 * variance. We do this operation in the log domain to keep the
		 * dynamic range in check
		 */

		for (k = 0; k < K; ++k) {
			corprod[k] = log((double) c[k]);
			for (j = 0; j < Ndim; ++j) {
				corprod[k] -=
				    0.5 * log((double) var[k][j]);
				if (var[k][j] > 0)
					hafinvvar[k][j] =
					    1.0 / (2.0 * var[k][j]);
				else
					hafinvvar[k][j] = 1e+20;
			}
		}

		LogProb = 0;
		for (i = 0; i < N; ++i) {
			LogProb +=
			    Expectation(Tau, x[i], c, mean, hafinvvar,
					corprod, K, Ndim);
			for (k = 0; k < K; ++k) {
				if (Tau[k] > 0) {
					Newc[k] += Tau[k];
					for (j = 0; j < Ndim; ++j) {
						Newmean[k][j] +=
						    Tau[k] * x[i][j];
						Newvar[k][j] +=
						    Tau[k] * (x[i][j] -
							      mean[k][j]) *
						    (x[i][j] - mean[k][j]);
					}
				}
			}
		}
		for (k = 0; k < K; ++k)
			for (j = 0; j < Ndim; ++j)
				Newmean[k][j] /= Newc[k];

/*
        for (i=0;i<N;++i)
        {
            Expectation(Tau,x[i],c,mean,hafinvvar,corprod,K,Ndim);
            for (k=0; k<K ; ++k)
            {
                for (j=0;j<Ndim;++j)
		    Newvar[k][j] += Tau[k] * (x[i][j] - Newmean[k][j])
		                           * (x[i][j] - Newmean[k][j]);
            }
        }
*/

		Improvement = (LogProb - Prevlogprob) / LogProb;
		if (LogProb < 0)
			Improvement = -Improvement;
		++iter;
		printf
		    ("EM : Log Prob = %f, improvement = %f after %d iterations\n",
		     LogProb, Improvement, iter);
		fflush(stdout);
		Prevlogprob = LogProb;
	}
	/*
	 * Free local arrays
	 */
	ckd_free(Tau);
	ckd_free(corprod);
	ckd_free_2d((void **)Newvar);
	ckd_free_2d((void **)Newmean);
	ckd_free(Newc);
	return;
}
Exemple #3
0
static YAP_Bool EM(void) {
  YAP_Term arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, out1, out2, out3,
      nodesTerm, ruleTerm, tail, pair, compoundTerm;
  DdNode *node1, **nodes_ex;
  int r, lenNodes, i, iter;
  long iter1;
  double CLL0 = -2.2 * pow(10, 10); //-inf
  double CLL1 = -1.7 * pow(10, 8);  //+inf
  double p, p0, **eta_rule, ea, er;
  double ratio, diff;

  arg1 = YAP_ARG1;
  arg2 = YAP_ARG2;
  arg3 = YAP_ARG3;
  arg4 = YAP_ARG4;
  arg5 = YAP_ARG5;
  arg6 = YAP_ARG6;
  arg7 = YAP_ARG7;
  arg8 = YAP_ARG8;

  nodesTerm = arg1;
  ea = YAP_FloatOfTerm(arg2);
  er = YAP_FloatOfTerm(arg3);
  lenNodes = YAP_IntOfTerm(arg4);
  iter = YAP_IntOfTerm(arg5);

  nodes_ex = (DdNode **)malloc(lenNodes * sizeof(DdNode *));
  nodes_probs_ex = (double *)malloc(lenNodes * sizeof(double));
  example_prob = (double *)malloc(lenNodes * sizeof(double));

  for (i = 0; i < lenNodes; i++) {
    pair = YAP_HeadOfTerm(nodesTerm);
    node1 = (DdNode *)YAP_IntOfTerm(YAP_HeadOfTerm(pair));
    nodes_ex[i] = node1;
    pair = YAP_TailOfTerm(pair);
    example_prob[i] = YAP_FloatOfTerm(YAP_HeadOfTerm(pair));
    nodesTerm = YAP_TailOfTerm(nodesTerm);
  }
  diff = CLL1 - CLL0;
  ratio = diff / fabs(CLL0);
  if (iter == -1)
    iter1 = 2147000000;
  else
    iter1 = iter;

  while ((diff > ea) && (ratio > er) && (cycle < iter1)) {
    cycle++;
    for (r = 0; r < nRules; r++) {
      for (i = 0; i < rules[r] - 1; i++) {
        eta_rule = eta[r];
        eta_rule[i][0] = 0;
        eta_rule[i][1] = 0;
      }
    }
    CLL0 = CLL1;
    CLL1 = Expectation(nodes_ex, lenNodes);
    Maximization();
    diff = CLL1 - CLL0;
    ratio = diff / fabs(CLL0);
  }
  out2 = YAP_TermNil();
  for (r = 0; r < nRules; r++) {
    tail = YAP_TermNil();
    p0 = 1;
    for (i = 0; i < rules[r] - 1; i++) {
      p = arrayprob[r][i] * p0;
      tail = YAP_MkPairTerm(YAP_MkFloatTerm(p), tail);
      p0 = p0 * (1 - arrayprob[r][i]);
    }
    tail = YAP_MkPairTerm(YAP_MkFloatTerm(p0), tail);
    ruleTerm = YAP_MkIntTerm(r);
    compoundTerm =
        YAP_MkPairTerm(ruleTerm, YAP_MkPairTerm(tail, YAP_TermNil()));
    out2 = YAP_MkPairTerm(compoundTerm, out2);
  }
  out3 = YAP_TermNil();
  for (i = 0; i < lenNodes; i++) {
    out3 = YAP_MkPairTerm(YAP_MkFloatTerm(nodes_probs_ex[i]), out3);
  }
  YAP_Unify(out3, arg8);

  out1 = YAP_MkFloatTerm(CLL1);
  YAP_Unify(out1, arg6);
  free(nodes_ex);
  free(example_prob);
  free(nodes_probs_ex);

  return (YAP_Unify(out2, arg7));
}
Exemple #4
0
double SpeechKMeans::Run(int rounds) {
  vector<vector<vector<DataPoint> > > phoneme_states(num_types_);
  int num_modes = cluster_problems_.num_modes();
  vector<vector<DataPoint> > center_estimators(num_modes);  
  vector<vector<double> > center_counts(num_modes);  

  if (use_unsupervised_ && !unsup_initialized_) {
    vector<double> weights;
    vector<DataPoint> points;
    for (int utterance_index = 0; 
         utterance_index < problems_.utterance_size(); 
         ++utterance_index) {
      ClusterSegmentsExpectation(utterance_index, &points, &weights); 
    }
    ClusterSegmentsMaximization(&points, &weights);
    unsup_initialized_ = true;
  }

  double round_score = 0.0;
  for (int round = 0; round < rounds; ++round) {
    if (use_gmm_) {
      for (int mode = 0; mode < num_modes; ++mode) {
        center_estimators[mode].resize(num_types_);
        center_counts[mode].resize(num_types_);
        for (int type = 0; type < num_types_; ++type) {
          center_estimators[mode][type].resize(problems_.num_features(), 0.0);
          for (int feat = 0; feat < problems_.num_features(); ++feat) {
            center_estimators[mode][type][feat] = 0.0;
          }
          center_counts[mode][type] = 0.0;
        }
      }
    }

    round_score = 0.0;
    int total_correctness = 0;
    for (int utterance_index = 0; 
         utterance_index < problems_.utterance_size(); 
         ++utterance_index) {
      int correctness; 
      if (use_unsupervised_) {
        round_score += UnsupExpectation(utterance_index, &correctness, &phoneme_states);
        total_correctness += correctness;
      } else if (!use_gmm_) {
        round_score += Expectation(utterance_index, &correctness, &phoneme_states);
        total_correctness += correctness;
      } else {
        round_score += GMMExpectation(utterance_index, center_estimators, center_counts);
        Expectation(utterance_index, &correctness, &phoneme_states);
        total_correctness += correctness;
      }
    }
    if (use_unsupervised_) {
      UnsupMaximization(phoneme_states);
    } else if (!use_gmm_) {
      Maximization(phoneme_states);
    } else {
      GMMMaximization(center_estimators, center_counts);
    }
    cerr << "SCORE: Round score: " << round << " " <<  round_score << " " << total_correctness << endl;
  }
  return round_score;
}