Ejemplo n.º 1
0
void mixMarkov(vector< vector<int> >& X, int trainingNum, int numOfState, 
        int K, vector<double>& pi, vector< vector<double> >& thetaInit,
        vector< vector< vector<double> > >& thetaTrans) {
    vector < vector< map<int, int> > > sigma;
    vector<double> ll_variables(3);
    vector< vector<double> > condProb(trainingNum);
    initVariable(pi, thetaInit, thetaTrans);
    setSigma(X, sigma, trainingNum);

    estep(X, pi, thetaInit, thetaTrans, condProb);
    
    ll_variables = computeLL(X, thetaInit, thetaTrans);
    
    for (iter = 1; iter<= MAX_ITER; ++iter) {
        mstep(pi, thetaInit, thetaTrans, condProb);
        estep(X, pi, thetaInit, thetaTrans, condProb);
        vector<double> new_ll_variables(3);
        new_ll_variables = computeLL(X, thetaInit, thetaTrans);
        
        //check for convergence
        if (new_ll_variables[0] + new_ll_variables[1] + new_ll_variables[2] - (ll_variables[0] + ll_variables[1] + ll_variables[2]) >TOL ) {
            ll_variables = new_ll_variables; 
        } else {
            break;
        }
    }
}
Ejemplo n.º 2
0
void EMFit<InitialClusteringType, CovarianceConstraintPolicy>::Estimate(
    const arma::mat& observations,
    std::vector<arma::vec>& means,
    std::vector<arma::mat>& covariances,
    arma::vec& weights,
    const bool useInitialModel)
{
  // Only perform initial clustering if the user wanted it.
  if (!useInitialModel)
    InitialClustering(observations, means, covariances, weights);

  double l = LogLikelihood(observations, means, covariances, weights);

  Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: "
      << l << std::endl;

  double lOld = -DBL_MAX;
  arma::mat condProb(observations.n_cols, means.size());

  // Iterate to update the model until no more improvement is found.
  size_t iteration = 1;
  while (std::abs(l - lOld) > tolerance && iteration != maxIterations)
  {
    Log::Info << "EMFit::Estimate(): iteration " << iteration << ", "
        << "log-likelihood " << l << "." << std::endl;

    // Calculate the conditional probabilities of choosing a particular
    // Gaussian given the observations and the present theta value.
    for (size_t i = 0; i < means.size(); i++)
    {
      // Store conditional probabilities into condProb vector for each
      // Gaussian.  First we make an alias of the condProb vector.
      arma::vec condProbAlias = condProb.unsafe_col(i);
      phi(observations, means[i], covariances[i], condProbAlias);
      condProbAlias *= weights[i];
    }

    // Normalize row-wise.
    for (size_t i = 0; i < condProb.n_rows; i++)
    {
      // Avoid dividing by zero; if the probability for everything is 0, we
      // don't want to make it NaN.
      const double probSum = accu(condProb.row(i));
      if (probSum != 0.0)
        condProb.row(i) /= probSum;
    }

    // Store the sum of the probability of each state over all the observations.
    arma::vec probRowSums = trans(arma::sum(condProb, 0 /* columnwise */));

    // Calculate the new value of the means using the updated conditional
    // probabilities.
    for (size_t i = 0; i < means.size(); i++)
    {
      // Don't update if there's no probability of the Gaussian having points.
      if (probRowSums[i] != 0)
        means[i] = (observations * condProb.col(i)) / probRowSums[i];

      // Calculate the new value of the covariances using the updated
      // conditional probabilities and the updated means.
      arma::mat tmp = observations - (means[i] *
          arma::ones<arma::rowvec>(observations.n_cols));
      arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) *
          trans(condProb.col(i)));

      // Don't update if there's no probability of the Gaussian having points.
      if (probRowSums[i] != 0.0)
        covariances[i] = (tmp * trans(tmpB)) / probRowSums[i];

      // Apply covariance constraint.
      constraint.ApplyConstraint(covariances[i]);
    }

    // Calculate the new values for omega using the updated conditional
    // probabilities.
    weights = probRowSums / observations.n_cols;

    // Update values of l; calculate new log-likelihood.
    lOld = l;
    l = LogLikelihood(observations, means, covariances, weights);

    iteration++;
  }
}
Ejemplo n.º 3
0
void EMFit<InitialClusteringType>::Estimate(const arma::mat& observations,
                                            const arma::vec& probabilities,
                                            std::vector<arma::vec>& means,
                                            std::vector<arma::mat>& covariances,
                                            arma::vec& weights)
{
  InitialClustering(observations, means, covariances, weights);

  double l = LogLikelihood(observations, means, covariances, weights);

  Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: "
      << l << std::endl;

  double lOld = -DBL_MAX;
  arma::mat condProb(observations.n_cols, means.size());

  // Iterate to update the model until no more improvement is found.
  size_t iteration = 1;
  while (std::abs(l - lOld) > tolerance && iteration != maxIterations)
  {
    // Calculate the conditional probabilities of choosing a particular
    // Gaussian given the observations and the present theta value.
    for (size_t i = 0; i < means.size(); i++)
    {
      // Store conditional probabilities into condProb vector for each
      // Gaussian.  First we make an alias of the condProb vector.
      arma::vec condProbAlias = condProb.unsafe_col(i);
      phi(observations, means[i], covariances[i], condProbAlias);
      condProbAlias *= weights[i];
    }

    // Normalize row-wise.
    for (size_t i = 0; i < condProb.n_rows; i++)
    {
      // Avoid dividing by zero; if the probability for everything is 0, we
      // don't want to make it NaN.
      const double probSum = accu(condProb.row(i));
      if (probSum != 0.0)
        condProb.row(i) /= probSum;
    }

    // This will store the sum of probabilities of each state over all the
    // observations.
    arma::vec probRowSums(means.size());

    // Calculate the new value of the means using the updated conditional
    // probabilities.
    for (size_t i = 0; i < means.size(); i++)
    {
      // Calculate the sum of probabilities of points, which is the
      // conditional probability of each point being from Gaussian i
      // multiplied by the probability of the point being from this mixture
      // model.
      probRowSums[i] = accu(condProb.col(i) % probabilities);

      means[i] = (observations * (condProb.col(i) % probabilities)) /
        probRowSums[i];

      // Calculate the new value of the covariances using the updated
      // conditional probabilities and the updated means.
      arma::mat tmp = observations - (means[i] *
          arma::ones<arma::rowvec>(observations.n_cols));
      arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) *
          trans(condProb.col(i) % probabilities));

      covariances[i] = (tmp * trans(tmpB)) / probRowSums[i];

      // Ensure positive-definiteness.  TODO: make this more efficient.
      if (forcePositive && det(covariances[i]) <= 1e-50)
      {
        Log::Debug << "Covariance matrix " << i << " is not positive definite. "
            << "Adding perturbation." << std::endl;

        double perturbation = 1e-30;
        while (det(covariances[i]) <= 1e-50)
        {
          covariances[i].diag() += perturbation;
          perturbation *= 10; // Slow, but we don't want to add too much.
        }
      }
    }

    // Calculate the new values for omega using the updated conditional
    // probabilities.
    weights = probRowSums / accu(probabilities);

    // Update values of l; calculate new log-likelihood.
    lOld = l;
    l = LogLikelihood(observations, means, covariances, weights);

    iteration++;
  }
}