void UpdateTheta( std::vector< std::list< Node<T1,T2> > > const& X, std::vector< double >& theta, std::vector<int> const& Y, const double alpha_start, const double alpha_stop ){ double alpha = alpha_start, logLikelihood, newLogLikelihood; std::vector<double> gradient = Gradient( X, theta, Y ); std::vector<double> new_theta = theta; logLikelihood = 99999999; newLogLikelihood = -99999999999; while( logLikelihood > newLogLikelihood && alpha > alpha_stop ){ for( unsigned int k = 0; k < new_theta.size(); ++k ){ new_theta[k] = theta[k] + alpha * gradient[k]; } logLikelihood = LogLikelihood( X, theta, Y ); newLogLikelihood = LogLikelihood( X, new_theta, Y ); alpha = alpha/2; } theta = new_theta; }
///////////////////////////////////////////////////////////////////////////// // Likelihood distribution of the eloDraw parameter ///////////////////////////////////////////////////////////////////////////// void CBradleyTerry::GetDrawEloDist(CCDistribution &cdist) const { double eloPrevious = eloDraw; for (int i = cdist.GetSize(); --i >= 0;) { eloDraw = cdist.ValueFromIndex(i); cdist.SetProbability(i, LogLikelihood()); } cdist.LogNormalize(); eloDraw = eloPrevious; }
///////////////////////////////////////////////////////////////////////////// // Get the likelihood distribution of one player // The ratings of opponents are supposed to be exact ///////////////////////////////////////////////////////////////////////////// void CBradleyTerry::GetPlayerDist(int Player, CCDistribution &cdist) const { std::vector<double> veloBackup(velo); for (int i = cdist.GetSize(); --i >= 0;) { velo[Player] = cdist.ValueFromIndex(i); if (crs.GetPlayers() > 1) { double Delta = (velo[Player] - veloBackup[Player]) / (crs.GetPlayers() - 1); for (int j = crs.GetPlayers(); --j >= 0;) if (j != Player) velo[j] = veloBackup[j] - Delta; } cdist.SetProbability(i, LogLikelihood(Player)); } cdist.LogNormalize(); velo = veloBackup; }
void TrainLogistic( std::vector< int > const& Training_Y, std::vector< std::list< Node<T1,T2> > > const& Training_X, std::vector< double >& theta, const double alpha_start, const double alpha_stop, const double epsilon, const unsigned int maxits ){ double logLikelihood, newLogLikelihood; unsigned int iter = 0; logLikelihood = 999999; newLogLikelihood = -9999999; while( fabs( newLogLikelihood - logLikelihood ) > epsilon && iter < maxits ){ logLikelihood = newLogLikelihood; UpdateTheta( Training_X, theta, Training_Y, alpha_start, alpha_stop ); newLogLikelihood = LogLikelihood( Training_X, theta, Training_Y ); if( iter % 100 == 0 ){ std::cout << "Logistic Regression Iteration: " << std::setw(5) << iter << "\t" << "Log-Likelihood: " << std::setw(8) << newLogLikelihood << "\t" << "abs( Change in Log-Likelihood ): " << std::setw(12) << fabs( newLogLikelihood - logLikelihood ) << std::endl; } iter++; } std::cout << "Logistic Regression Iteration: " << std::setw(5) << iter << "\t" << "Log-Likelihood: " << std::setw(8) << newLogLikelihood << "\t" << "abs( Change in Log-Likelihood ): " << std::setw(12) << fabs( newLogLikelihood - logLikelihood ) << std::endl; if( iter == maxits ){ std::cout << "WARNING: Algorithm did not converge." << std::endl; } }
void EMFit<InitialClusteringType, CovarianceConstraintPolicy>::Estimate( const arma::mat& observations, std::vector<arma::vec>& means, std::vector<arma::mat>& covariances, arma::vec& weights, const bool useInitialModel) { // Only perform initial clustering if the user wanted it. if (!useInitialModel) InitialClustering(observations, means, covariances, weights); double l = LogLikelihood(observations, means, covariances, weights); Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: " << l << std::endl; double lOld = -DBL_MAX; arma::mat condProb(observations.n_cols, means.size()); // Iterate to update the model until no more improvement is found. size_t iteration = 1; while (std::abs(l - lOld) > tolerance && iteration != maxIterations) { Log::Info << "EMFit::Estimate(): iteration " << iteration << ", " << "log-likelihood " << l << "." << std::endl; // Calculate the conditional probabilities of choosing a particular // Gaussian given the observations and the present theta value. for (size_t i = 0; i < means.size(); i++) { // Store conditional probabilities into condProb vector for each // Gaussian. First we make an alias of the condProb vector. arma::vec condProbAlias = condProb.unsafe_col(i); phi(observations, means[i], covariances[i], condProbAlias); condProbAlias *= weights[i]; } // Normalize row-wise. for (size_t i = 0; i < condProb.n_rows; i++) { // Avoid dividing by zero; if the probability for everything is 0, we // don't want to make it NaN. const double probSum = accu(condProb.row(i)); if (probSum != 0.0) condProb.row(i) /= probSum; } // Store the sum of the probability of each state over all the observations. arma::vec probRowSums = trans(arma::sum(condProb, 0 /* columnwise */)); // Calculate the new value of the means using the updated conditional // probabilities. for (size_t i = 0; i < means.size(); i++) { // Don't update if there's no probability of the Gaussian having points. if (probRowSums[i] != 0) means[i] = (observations * condProb.col(i)) / probRowSums[i]; // Calculate the new value of the covariances using the updated // conditional probabilities and the updated means. arma::mat tmp = observations - (means[i] * arma::ones<arma::rowvec>(observations.n_cols)); arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) * trans(condProb.col(i))); // Don't update if there's no probability of the Gaussian having points. if (probRowSums[i] != 0.0) covariances[i] = (tmp * trans(tmpB)) / probRowSums[i]; // Apply covariance constraint. constraint.ApplyConstraint(covariances[i]); } // Calculate the new values for omega using the updated conditional // probabilities. weights = probRowSums / observations.n_cols; // Update values of l; calculate new log-likelihood. lOld = l; l = LogLikelihood(observations, means, covariances, weights); iteration++; } }
void EMFit<InitialClusteringType>::Estimate(const arma::mat& observations, const arma::vec& probabilities, std::vector<arma::vec>& means, std::vector<arma::mat>& covariances, arma::vec& weights) { InitialClustering(observations, means, covariances, weights); double l = LogLikelihood(observations, means, covariances, weights); Log::Debug << "EMFit::Estimate(): initial clustering log-likelihood: " << l << std::endl; double lOld = -DBL_MAX; arma::mat condProb(observations.n_cols, means.size()); // Iterate to update the model until no more improvement is found. size_t iteration = 1; while (std::abs(l - lOld) > tolerance && iteration != maxIterations) { // Calculate the conditional probabilities of choosing a particular // Gaussian given the observations and the present theta value. for (size_t i = 0; i < means.size(); i++) { // Store conditional probabilities into condProb vector for each // Gaussian. First we make an alias of the condProb vector. arma::vec condProbAlias = condProb.unsafe_col(i); phi(observations, means[i], covariances[i], condProbAlias); condProbAlias *= weights[i]; } // Normalize row-wise. for (size_t i = 0; i < condProb.n_rows; i++) { // Avoid dividing by zero; if the probability for everything is 0, we // don't want to make it NaN. const double probSum = accu(condProb.row(i)); if (probSum != 0.0) condProb.row(i) /= probSum; } // This will store the sum of probabilities of each state over all the // observations. arma::vec probRowSums(means.size()); // Calculate the new value of the means using the updated conditional // probabilities. for (size_t i = 0; i < means.size(); i++) { // Calculate the sum of probabilities of points, which is the // conditional probability of each point being from Gaussian i // multiplied by the probability of the point being from this mixture // model. probRowSums[i] = accu(condProb.col(i) % probabilities); means[i] = (observations * (condProb.col(i) % probabilities)) / probRowSums[i]; // Calculate the new value of the covariances using the updated // conditional probabilities and the updated means. arma::mat tmp = observations - (means[i] * arma::ones<arma::rowvec>(observations.n_cols)); arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) * trans(condProb.col(i) % probabilities)); covariances[i] = (tmp * trans(tmpB)) / probRowSums[i]; // Ensure positive-definiteness. TODO: make this more efficient. if (forcePositive && det(covariances[i]) <= 1e-50) { Log::Debug << "Covariance matrix " << i << " is not positive definite. " << "Adding perturbation." << std::endl; double perturbation = 1e-30; while (det(covariances[i]) <= 1e-50) { covariances[i].diag() += perturbation; perturbation *= 10; // Slow, but we don't want to add too much. } } } // Calculate the new values for omega using the updated conditional // probabilities. weights = probRowSums / accu(probabilities); // Update values of l; calculate new log-likelihood. lOld = l; l = LogLikelihood(observations, means, covariances, weights); iteration++; } }
double GMM<FittingType>::Estimate(const arma::mat& observations, const arma::vec& probabilities, const size_t trials, const bool useExistingModel) { double bestLikelihood; // This will be reported later. // We don't need to store temporary models if we are only doing one trial. if (trials == 1) { // Train the model. The user will have been warned earlier if the GMM was // initialized with no parameters (0 gaussians, dimensionality of 0). fitter.Estimate(observations, probabilities, means, covariances, weights, useExistingModel); bestLikelihood = LogLikelihood(observations, means, covariances, weights); } else { if (trials == 0) return -DBL_MAX; // It's what they asked for... // If each trial must start from the same initial location, we must save it. std::vector<arma::vec> meansOrig; std::vector<arma::mat> covariancesOrig; arma::vec weightsOrig; if (useExistingModel) { meansOrig = means; covariancesOrig = covariances; weightsOrig = weights; } // We need to keep temporary copies. We'll do the first training into the // actual model position, so that if it's the best we don't need to copy it. fitter.Estimate(observations, probabilities, means, covariances, weights, useExistingModel); bestLikelihood = LogLikelihood(observations, means, covariances, weights); Rcpp::Rcout << "GMM::Estimate(): Log-likelihood of trial 0 is " << bestLikelihood << "." << std::endl; // Now the temporary model. std::vector<arma::vec> meansTrial(gaussians, arma::vec(dimensionality)); std::vector<arma::mat> covariancesTrial(gaussians, arma::mat(dimensionality, dimensionality)); arma::vec weightsTrial(gaussians); for (size_t trial = 1; trial < trials; ++trial) { if (useExistingModel) { meansTrial = meansOrig; covariancesTrial = covariancesOrig; weightsTrial = weightsOrig; } fitter.Estimate(observations, meansTrial, covariancesTrial, weightsTrial, useExistingModel); // Check to see if the log-likelihood of this one is better. double newLikelihood = LogLikelihood(observations, meansTrial, covariancesTrial, weightsTrial); Rcpp::Rcout << "GMM::Estimate(): Log-likelihood of trial " << trial << " is " << newLikelihood << "." << std::endl; if (newLikelihood > bestLikelihood) { // Save new likelihood and copy new model. bestLikelihood = newLikelihood; means = meansTrial; covariances = covariancesTrial; weights = weightsTrial; } } } // Report final log-likelihood and return it. Rcpp::Rcout << "GMM::Estimate(): log-likelihood of trained GMM is " << bestLikelihood << "." << std::endl; return bestLikelihood; }