void uDIIS::solve_P(arma::mat & Pa, arma::mat & Pb) { arma::vec sol=get_w(); // Form weighted density matrix Pa.zeros(); Pb.zeros(); for(size_t i=0;i<stack.size();i++) { Pa+=sol(i)*stack[i].Pa; Pb+=sol(i)*stack[i].Pb; } }
void uDIIS::solve_F(arma::mat & Fa, arma::mat & Fb) { arma::vec sol=get_w(); // Form weighted Fock matrix Fa.zeros(); Fb.zeros(); for(size_t i=0;i<stack.size();i++) { Fa+=sol(i)*stack[i].Fa; Fb+=sol(i)*stack[i].Fb; } }
void HMM<Distribution>::Backward(const arma::mat& dataSeq, const arma::vec& scales, arma::mat& backwardProb) const { // Our goal is to calculate the backward probabilities: // P(X_k | o_{k + 1:T}) for all possible states X_k, for each time point k. backwardProb.zeros(transition.n_rows, dataSeq.n_cols); // The last element probability is 1. backwardProb.col(dataSeq.n_cols - 1).fill(1); // Now step backwards through all other observations. for (size_t t = dataSeq.n_cols - 2; t + 1 > 0; t--) { for (size_t j = 0; j < transition.n_rows; j++) { // The backward probability of state j at time t is the sum over all state // of the probability of the next state having been a transition from the // current state multiplied by the probability of each of those states // emitting the given observation. for (size_t state = 0; state < transition.n_rows; state++) backwardProb(j, t) += transition(state, j) * backwardProb(state, t + 1) * emission[state].Probability(dataSeq.unsafe_col(t + 1)); // Normalize by the weights from the forward algorithm. backwardProb(j, t) /= scales[t + 1]; } } }
void SGDTestFunction::Gradient(const arma::mat& coordinates, const size_t begin, arma::mat& gradient, const size_t batchSize) const { gradient.zeros(3); for (size_t i = begin; i < begin + batchSize; ++i) { switch (visitationOrder(i)) { case 0: if (coordinates[0] >= 0) gradient[0] += std::exp(-coordinates[0]); else gradient[0] += -std::exp(coordinates[0]); break; case 1: gradient[1] += 2 * coordinates[1]; break; case 2: gradient[2] += 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2]; break; } } gradient /= batchSize; }
inline static void Initialize(const MatType& V, const size_t r, arma::mat& W, arma::mat& H) { const size_t n = V.n_rows; const size_t m = V.n_cols; if (columnsToAverage > m) { Log::Warn << "Number of random columns (columnsToAverage) is more than " << "the number of columns available in the V matrix; weird results " << "may ensue!" << std::endl; } W.zeros(n, r); // Initialize W matrix with random columns. for (size_t col = 0; col < r; col++) { for (size_t randCol = 0; randCol < columnsToAverage; randCol++) { // .col() does not work in this case, as of Armadillo 3.920. W.unsafe_col(col) += V.col(math::RandInt(0, m)); } } // Now divide by p. W /= columnsToAverage; // Initialize H to random values. H.randu(r, m); }
void LovaszThetaSDP::GradientConstraint(const size_t index, const arma::mat& coordinates, arma::mat& gradient) { // Log::Debug << "Gradient of constraint " << index << " is " << std::endl; if (index == 0) // This is the constraint Tr(X) = 1. { gradient = 2 * coordinates; // d/dR (Tr(R R^T)) = 2 R. // std::cout << gradient; return; } // Log::Debug << "Evaluating gradient of constraint " << index << " with "; size_t i = edges(0, index - 1); size_t j = edges(1, index - 1); // Log::Debug << "i = " << i << " and j = " << j << "." << std::endl; // Since the constraint is (R^T R)_ij, the gradient for (x, y) will be (I // derived this for one of the MVU constraints): // 0 , y != i, y != j // 2 R_xj, y = i, y != j // 2 R_xi, y != i, y = j // 4 R_xy, y = i, y = j // This results in the gradient matrix having two nonzero rows; for row // i, the elements are R_nj, where n is the row; for column j, the elements // are R_ni. gradient.zeros(coordinates.n_rows, coordinates.n_cols); gradient.col(i) = coordinates.col(j); gradient.col(j) += coordinates.col(i); // In case j = i (shouldn't happen). // std::cout << gradient; }
void RegularizedSVDFunction::Gradient(const arma::mat& parameters, arma::mat& gradient) const { // For an example with rating corresponding to user 'i' and item 'j', the // gradients for the parameters is as follows: // grad(u(i)) = lambda * u(i) - error * v(j) // grad(v(j)) = lambda * v(j) - error * u(i) // 'error' is the prediction error for that example, which is: // rating(i, j) - u(i).t() * v(j) // The full gradient is calculated by summing the contributions over all the // training examples. gradient.zeros(rank, numUsers + numItems); for (size_t i = 0; i < data.n_cols; i++) { // Indices for accessing the the correct parameter columns. const size_t user = data(0, i); const size_t item = data(1, i) + numUsers; // Prediction error for the example. const double rating = data(2, i); double ratingError = rating - arma::dot(parameters.col(user), parameters.col(item)); // Gradient is non-zero only for the parameter columns corresponding to the // example. gradient.col(user) += 2 * (lambda * parameters.col(user) - ratingError * parameters.col(item)); gradient.col(item) += 2 * (lambda * parameters.col(item) - ratingError * parameters.col(user)); } }
void GockenbachFunction::GradientConstraint(const size_t index, const arma::mat& coordinates, arma::mat& gradient) { gradient.zeros(3, 1); switch (index) { case 0: // g'_x1(x) = -1 // g'_x2(x) = -1 // g'_x3(x) = 1 gradient[0] = -1; gradient[1] = -1; gradient[2] = 1; break; case 1: // h'_x1(x) = -2 x_1 // h'_x2(x) = 0 // h'_x3(x) = 1 gradient[0] = -2 * coordinates[0]; gradient[2] = 1; break; } }
void rDIIS::solve_P(arma::mat & P) { arma::vec sol=get_w(); // Form weighted density matrix P.zeros(); for(size_t i=0;i<stack.size();i++) P+=sol(i)*stack[i].P; }
void rDIIS::solve_F(arma::mat & F) { arma::vec sol=get_w(); // Form weighted Fock matrix F.zeros(); for(size_t i=0;i<stack.size();i++) F+=sol(i)*stack[i].F; }
//! Calculate the gradient of one of the individual functions. void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates, const size_t i, arma::mat& gradient) const { gradient.zeros(n); gradient[i] = 400 * (std::pow(coordinates[i], 3) - coordinates[i] * coordinates[i + 1]) + 2 * (coordinates[i] - 1); gradient[i + 1] = 200 * (coordinates[i + 1] - std::pow(coordinates[i], 2)); }
void HMM<Distribution>::Smooth(const arma::mat& dataSeq, arma::mat& smoothSeq) const { // First run the forward algorithm. arma::mat stateProb; Estimate(dataSeq, stateProb); // Compute expected emissions. // Will not work for distributions without a Mean() function. smoothSeq.zeros(dimensionality, dataSeq.n_cols); for (size_t i = 0; i < emission.size(); i++) smoothSeq += emission[i].Mean() * stateProb.row(i); }
void RNN< LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction >::Gradient(const arma::mat& /* unused */, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols); } else { gradient.zeros(); } Evaluate(parameter, i, false); arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols); NetworkGradients(currentGradient, network); const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true); // Iterate through the input sequence and perform the feed backward pass. for (seqNum = seqLen - 1; seqNum >= 0; seqNum--) { // Load the network activation for the upcoming backward pass. LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), network); // Perform the backward pass. if (seqOutput) { arma::mat seqError = error.unsafe_col(seqNum); Backward(seqError, network); } else { Backward(error, network); } // Link the parameters and update the gradients. LinkParameter(network); UpdateGradients<>(network); // Update the overall gradient. gradient += currentGradient; if (seqNum == 0) break; } }
void AugLagrangianTestFunction::GradientConstraint(const size_t index, const arma::mat& /* coordinates */, arma::mat& gradient) { // If the user passed an invalid index (not 0), we will return a zero // gradient. gradient.zeros(2, 1); if (index == 0) { // c'_x1(x) = 1 // c'_x2(x) = 1 gradient.ones(2, 1); // Use a shortcut instead of assigning individually. } }
//! Calculate the gradient of one of the individual functions. void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates, const size_t i, arma::mat& gradient, const size_t batchSize) const { gradient.zeros(n); for (size_t j = i; j < i + batchSize; ++j) { const size_t p = visitationOrder[j]; gradient[p] = 400 * (std::pow(coordinates[p], 3) - coordinates[p] * coordinates[p + 1]) + 2 * (coordinates[p] - 1); gradient[p + 1] = 200 * (coordinates[p + 1] - std::pow(coordinates[p], 2)); } }
void SA<FunctionType, CoolingScheduleType>::MoveControl(const size_t nMoves, arma::mat& accept) { arma::mat target; target.copy_size(accept); target.fill(0.44); moveSize = arma::log(moveSize); moveSize += gain * (accept / (double) nMoves - target); moveSize = arma::exp(moveSize); // To avoid the use of element-wise arma::min(), which is only available in // Armadillo after v3.930, we use a for loop here instead. for (size_t i = 0; i < accept.n_elem; ++i) moveSize(i) = (moveSize(i) > maxMove(i)) ? maxMove(i) : moveSize(i); accept.zeros(); }
double find_P(const arma::mat& X, const arma::mat& Y, double sigma2, float outliers, arma::vec& P1, arma::vec& Pt1, arma::mat& PX, bool use_fgt, const float epsilon) { P1.zeros(); Pt1.zeros(); PX.zeros(); const arma::uword N = X.n_rows; const arma::uword M = Y.n_rows; const arma::uword D = Y.n_cols; const double h = std::sqrt(2 * sigma2); const double ndi = (outliers * M * std::pow(2 * M_PI * sigma2, 0.5 * D)) / ((1 - outliers) * N); arma::vec q = arma::ones<arma::vec>(M); fgt::GaussTransformUnqPtr transformY; if (use_fgt) { transformY = fgt::choose_gauss_transform(Y, h, epsilon); } else { transformY.reset(new fgt::Direct(Y, h)); } arma::vec denomP = transformY->compute(X, q); denomP = denomP + ndi; Pt1 = 1 - ndi / denomP; q = 1 / denomP; fgt::GaussTransformUnqPtr transformX; if (use_fgt) { transformX = fgt::choose_gauss_transform(X, h, epsilon); } else { transformX.reset(new fgt::Direct(X, h)); } P1 = transformX->compute(Y, q); for (arma::uword i = 0; i < D; ++i) { q = X.col(i) / denomP; arma::vec c = PX.unsafe_col(i); PX.col(i) = transformX->compute(Y, q); } return -arma::sum(arma::log(denomP)) + D * N * std::log(sigma2) / 2; }
void HMM<Distribution>::Filter(const arma::mat& dataSeq, arma::mat& filterSeq, size_t ahead) const { // First run the forward algorithm. arma::mat forwardProb; arma::vec scales; Forward(dataSeq, scales, forwardProb); // Propagate state ahead. if (ahead != 0) forwardProb = pow(transition, ahead) * forwardProb; // Compute expected emissions. // Will not work for distributions without a Mean() function. filterSeq.zeros(dimensionality, dataSeq.n_cols); for (size_t i = 0; i < emission.size(); i++) filterSeq += emission[i].Mean() * forwardProb.row(i); }
double mlpack::cf::SVDWrapper<Factorizer>::Apply(const arma::mat& V, arma::mat& W, arma::mat& sigma, arma::mat& H) const { // get svd factorization arma::vec E; factorizer.Apply(W, E, H, V); // construct sigma matrix sigma.zeros(V.n_rows, V.n_cols); for(size_t i = 0;i < sigma.n_rows && i < sigma.n_cols;i++) sigma(i, i) = E(i, 0); arma::mat V_rec = W * sigma * arma::trans(H); // return normalized frobenius error return arma::norm(V - V_rec, "fro") / arma::norm(V, "fro"); }
void AugLagrangianFunction<LagrangianFunction>::Gradient( const arma::mat& coordinates, arma::mat& gradient) const { // The augmented Lagrangian's gradient is evaluted as // f'(x) + {(-lambda_i + sigma * c_i(x)) * c'_i(x)} for all constraints gradient.zeros(); function.Gradient(coordinates, gradient); arma::mat constraintGradient; // Temporary for constraint gradients. for (size_t i = 0; i < function.NumConstraints(); i++) { function.GradientConstraint(i, coordinates, constraintGradient); // Now calculate scaling factor and add to existing gradient. arma::mat tmpGradient; tmpGradient = (-lambda[i] + sigma * function.EvaluateConstraint(i, coordinates)) * constraintGradient; gradient += tmpGradient; } }
void HMM<Distribution>::Forward(const arma::mat& dataSeq, arma::vec& scales, arma::mat& forwardProb) const { // Our goal is to calculate the forward probabilities: // P(X_k | o_{1:k}) for all possible states X_k, for each time point k. forwardProb.zeros(transition.n_rows, dataSeq.n_cols); scales.zeros(dataSeq.n_cols); // The first entry in the forward algorithm uses the initial state // probabilities. Note that MATLAB assumes that the starting state (at // t = -1) is state 0; this is not our assumption here. To force that // behavior, you could append a single starting state to every single data // sequence and that should produce results in line with MATLAB. for (size_t state = 0; state < transition.n_rows; state++) forwardProb(state, 0) = initial(state) * emission[state].Probability(dataSeq.unsafe_col(0)); // Then normalize the column. scales[0] = accu(forwardProb.col(0)); forwardProb.col(0) /= scales[0]; // Now compute the probabilities for each successive observation. for (size_t t = 1; t < dataSeq.n_cols; t++) { for (size_t j = 0; j < transition.n_rows; j++) { // The forward probability of state j at time t is the sum over all states // of the probability of the previous state transitioning to the current // state and emitting the given observation. forwardProb(j, t) = accu(forwardProb.col(t - 1) % trans(transition.row(j))) * emission[j].Probability(dataSeq.unsafe_col(t)); } // Normalize probability. scales[t] = accu(forwardProb.col(t)); forwardProb.col(t) /= scales[t]; } }
void SGDTestFunction::Gradient(const arma::mat& coordinates, const size_t i, arma::mat& gradient) const { gradient.zeros(3); switch (i) { case 0: if (coordinates[0] >= 0) gradient[0] = std::exp(-coordinates[0]); else gradient[0] = -std::exp(coordinates[1]); break; case 1: gradient[1] = 2 * coordinates[1]; break; case 2: gradient[2] = 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2]; break; } }
void HMM<Distribution>::Forward(const arma::mat& dataSeq, arma::vec& scales, arma::mat& forwardProb) const { // Our goal is to calculate the forward probabilities: // P(X_k | o_{1:k}) for all possible states X_k, for each time point k. forwardProb.zeros(transition.n_rows, dataSeq.n_cols); scales.zeros(dataSeq.n_cols); // Starting state (at t = -1) is assumed to be state 0. This is what MATLAB // does in their hmmdecode() function, so we will emulate that behavior. for (size_t state = 0; state < transition.n_rows; state++) forwardProb(state, 0) = transition(state, 0) * emission[state].Probability(dataSeq.unsafe_col(0)); // Then normalize the column. scales[0] = accu(forwardProb.col(0)); forwardProb.col(0) /= scales[0]; // Now compute the probabilities for each successive observation. for (size_t t = 1; t < dataSeq.n_cols; t++) { for (size_t j = 0; j < transition.n_rows; j++) { // The forward probability of state j at time t is the sum over all states // of the probability of the previous state transitioning to the current // state and emitting the given observation. forwardProb(j, t) = accu(forwardProb.col(t - 1) % trans(transition.row(j))) * emission[j].Probability(dataSeq.unsafe_col(t)); } // Normalize probability. scales[t] = accu(forwardProb.col(t)); forwardProb.col(t) /= scales[t]; } }
double PellegMooreKMeans<MetricType, MatType>::Iterate( const arma::mat& centroids, arma::mat& newCentroids, arma::Col<size_t>& counts) { newCentroids.zeros(centroids.n_rows, centroids.n_cols); counts.zeros(centroids.n_cols); // Create rules object. typedef PellegMooreKMeansRules<MetricType, TreeType> RulesType; RulesType rules(dataset, centroids, newCentroids, counts, metric); // Use single-tree traverser. typename TreeType::template SingleTreeTraverser<RulesType> traverser(rules); // Now, do a traversal with a fake query index (since the query index is // irrelevant; we are checking each node with all clusters. traverser.Traverse(0, *tree); distanceCalculations += rules.DistanceCalculations(); // Now, calculate how far the clusters moved, after normalizing them. double residual = 0.0; for (size_t c = 0; c < centroids.n_cols; ++c) { if (counts[c] > 0) { newCentroids.col(c) /= counts(c); residual += std::pow(metric.Evaluate(centroids.col(c), newCentroids.col(c)), 2.0); } } distanceCalculations += centroids.n_cols; return std::sqrt(residual); }
double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate( const arma::mat& centroids, arma::mat& newCentroids, arma::Col<size_t>& counts) { newCentroids.zeros(centroids.n_rows, centroids.n_cols); counts.zeros(centroids.n_cols); if (clusterDistances.n_elem != centroids.n_cols + 1) { clusterDistances.set_size(centroids.n_cols + 1); clusterDistances.fill(DBL_MAX / 2.0); // To prevent overflow. } // Build a tree on the centroids. std::vector<size_t> oldFromNewCentroids; TreeType* centroidTree = BuildTree<TreeType>( const_cast<typename TreeType::Mat&>(centroids), oldFromNewCentroids); // Now run the dual-tree algorithm. typedef DualTreeKMeansRules<MetricType, TreeType> RulesType; RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids, iteration, clusterDistances, distances, assignments, distanceIteration, metric); // Use the dual-tree traverser. //typename TreeType::template DualTreeTraverser<RulesType> traverser(rules); typename TreeType::template BreadthFirstDualTreeTraverser<RulesType> traverser(rules); traverser.Traverse(*centroidTree, *tree); distanceCalculations += rules.DistanceCalculations(); // Now, calculate how far the clusters moved, after normalizing them. double residual = 0.0; clusterDistances.zeros(); for (size_t c = 0; c < centroids.n_cols; ++c) { if (counts[c] == 0) { newCentroids.col(c).fill(DBL_MAX); // Should have happened anyway I think. } else { const size_t oldCluster = oldFromNewCentroids[c]; newCentroids.col(oldCluster) /= counts(oldCluster); const double dist = metric.Evaluate(centroids.col(c), newCentroids.col(oldCluster)); if (dist > clusterDistances[centroids.n_cols]) clusterDistances[centroids.n_cols] = dist; clusterDistances[oldCluster] = dist; residual += std::pow(dist, 2.0); } } Log::Info << clusterDistances.t(); delete centroidTree; ++iteration; return std::sqrt(residual); }
void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates, const size_t i, arma::mat& gradient) { // We will need to calculate p_i before this evaluation is done, so these two // variables will hold the information necessary for that. double numerator = 0; double denominator = 0; // The gradient involves two matrix terms which are eventually combined into // one. arma::mat firstTerm; arma::mat secondTerm; firstTerm.zeros(coordinates.n_rows, coordinates.n_cols); secondTerm.zeros(coordinates.n_rows, coordinates.n_cols); // Compute the stretched dataset. stretchedDataset = coordinates * dataset; for (size_t k = 0; k < dataset.n_cols; ++k) { // Don't consider the case where the points are the same. if (i == k) continue; // Calculate the numerator of p_ik. double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i), stretchedDataset.unsafe_col(k))); // If the points are in the same class, we must add to the second term of // the gradient as well as the numerator of p_i. We will divide by the // denominator of p_ik later. For x_ik we are not using stretched points. arma::vec x_ik = dataset.col(i) - dataset.col(k); if (labels[i] == labels[k]) { numerator += eval; secondTerm += eval * x_ik * trans(x_ik); } // We always have to add to the denominator of p_i and the first term of the // gradient computation. We will divide by the denominator of p_ik later. denominator += eval; firstTerm += eval * x_ik * trans(x_ik); } // Calculate p_i. double p = 0; if (denominator == 0) { Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl; // If the denominator is zero, then all p_ik should be zero and there is // no gradient contribution from this point. gradient.zeros(coordinates.n_rows, coordinates.n_rows); return; } else { p = numerator / denominator; firstTerm /= denominator; secondTerm /= denominator; } // Now multiply the first term by p_i, and add the two together and multiply // all by 2 * A. We negate it though, because our optimizer is a minimizer. gradient = -2 * coordinates * (p * firstTerm - secondTerm); }