void mixMarkov(vector< vector<int> >& X, int trainingNum, int numOfState, int K, vector<double>& pi, vector< vector<double> >& thetaInit, vector< vector< vector<double> > >& thetaTrans) { vector < vector< map<int, int> > > sigma; vector<double> ll_variables(3); vector< vector<double> > condProb(trainingNum); initVariable(pi, thetaInit, thetaTrans); setSigma(X, sigma, trainingNum); estep(X, pi, thetaInit, thetaTrans, condProb); ll_variables = computeLL(X, thetaInit, thetaTrans); for (iter = 1; iter<= MAX_ITER; ++iter) { mstep(pi, thetaInit, thetaTrans, condProb); estep(X, pi, thetaInit, thetaTrans, condProb); vector<double> new_ll_variables(3); new_ll_variables = computeLL(X, thetaInit, thetaTrans); //check for convergence if (new_ll_variables[0] + new_ll_variables[1] + new_ll_variables[2] - (ll_variables[0] + ll_variables[1] + ll_variables[2]) >TOL ) { ll_variables = new_ll_variables; } else { break; } } }
void EMFit<InitialClusteringType, CovarianceConstraintPolicy>::Estimate( const arma::mat& observations, std::vector<arma::vec>& means, std::vector<arma::mat>& covariances, arma::vec& weights, const bool useInitialModel) { // Only perform initial clustering if the user wanted it. if (!useInitialModel) InitialClustering(observations, means, covariances, weights); double l = LogLikelihood(observations, means, covariances, weights); Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: " << l << std::endl; double lOld = -DBL_MAX; arma::mat condProb(observations.n_cols, means.size()); // Iterate to update the model until no more improvement is found. size_t iteration = 1; while (std::abs(l - lOld) > tolerance && iteration != maxIterations) { Log::Info << "EMFit::Estimate(): iteration " << iteration << ", " << "log-likelihood " << l << "." << std::endl; // Calculate the conditional probabilities of choosing a particular // Gaussian given the observations and the present theta value. for (size_t i = 0; i < means.size(); i++) { // Store conditional probabilities into condProb vector for each // Gaussian. First we make an alias of the condProb vector. arma::vec condProbAlias = condProb.unsafe_col(i); phi(observations, means[i], covariances[i], condProbAlias); condProbAlias *= weights[i]; } // Normalize row-wise. for (size_t i = 0; i < condProb.n_rows; i++) { // Avoid dividing by zero; if the probability for everything is 0, we // don't want to make it NaN. const double probSum = accu(condProb.row(i)); if (probSum != 0.0) condProb.row(i) /= probSum; } // Store the sum of the probability of each state over all the observations. arma::vec probRowSums = trans(arma::sum(condProb, 0 /* columnwise */)); // Calculate the new value of the means using the updated conditional // probabilities. for (size_t i = 0; i < means.size(); i++) { // Don't update if there's no probability of the Gaussian having points. if (probRowSums[i] != 0) means[i] = (observations * condProb.col(i)) / probRowSums[i]; // Calculate the new value of the covariances using the updated // conditional probabilities and the updated means. arma::mat tmp = observations - (means[i] * arma::ones<arma::rowvec>(observations.n_cols)); arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) * trans(condProb.col(i))); // Don't update if there's no probability of the Gaussian having points. if (probRowSums[i] != 0.0) covariances[i] = (tmp * trans(tmpB)) / probRowSums[i]; // Apply covariance constraint. constraint.ApplyConstraint(covariances[i]); } // Calculate the new values for omega using the updated conditional // probabilities. weights = probRowSums / observations.n_cols; // Update values of l; calculate new log-likelihood. lOld = l; l = LogLikelihood(observations, means, covariances, weights); iteration++; } }
void EMFit<InitialClusteringType>::Estimate(const arma::mat& observations, const arma::vec& probabilities, std::vector<arma::vec>& means, std::vector<arma::mat>& covariances, arma::vec& weights) { InitialClustering(observations, means, covariances, weights); double l = LogLikelihood(observations, means, covariances, weights); Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: " << l << std::endl; double lOld = -DBL_MAX; arma::mat condProb(observations.n_cols, means.size()); // Iterate to update the model until no more improvement is found. size_t iteration = 1; while (std::abs(l - lOld) > tolerance && iteration != maxIterations) { // Calculate the conditional probabilities of choosing a particular // Gaussian given the observations and the present theta value. for (size_t i = 0; i < means.size(); i++) { // Store conditional probabilities into condProb vector for each // Gaussian. First we make an alias of the condProb vector. arma::vec condProbAlias = condProb.unsafe_col(i); phi(observations, means[i], covariances[i], condProbAlias); condProbAlias *= weights[i]; } // Normalize row-wise. for (size_t i = 0; i < condProb.n_rows; i++) { // Avoid dividing by zero; if the probability for everything is 0, we // don't want to make it NaN. const double probSum = accu(condProb.row(i)); if (probSum != 0.0) condProb.row(i) /= probSum; } // This will store the sum of probabilities of each state over all the // observations. arma::vec probRowSums(means.size()); // Calculate the new value of the means using the updated conditional // probabilities. for (size_t i = 0; i < means.size(); i++) { // Calculate the sum of probabilities of points, which is the // conditional probability of each point being from Gaussian i // multiplied by the probability of the point being from this mixture // model. probRowSums[i] = accu(condProb.col(i) % probabilities); means[i] = (observations * (condProb.col(i) % probabilities)) / probRowSums[i]; // Calculate the new value of the covariances using the updated // conditional probabilities and the updated means. arma::mat tmp = observations - (means[i] * arma::ones<arma::rowvec>(observations.n_cols)); arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) * trans(condProb.col(i) % probabilities)); covariances[i] = (tmp * trans(tmpB)) / probRowSums[i]; // Ensure positive-definiteness. TODO: make this more efficient. if (forcePositive && det(covariances[i]) <= 1e-50) { Log::Debug << "Covariance matrix " << i << " is not positive definite. " << "Adding perturbation." << std::endl; double perturbation = 1e-30; while (det(covariances[i]) <= 1e-50) { covariances[i].diag() += perturbation; perturbation *= 10; // Slow, but we don't want to add too much. } } } // Calculate the new values for omega using the updated conditional // probabilities. weights = probRowSums / accu(probabilities); // Update values of l; calculate new log-likelihood. lOld = l; l = LogLikelihood(observations, means, covariances, weights); iteration++; } }