コード例 #1
0
ファイル: diis.cpp プロジェクト: Monkey---Brainz/erkale
void uDIIS::solve_P(arma::mat & Pa, arma::mat & Pb) {
  arma::vec sol=get_w();
 
  // Form weighted density matrix
  Pa.zeros();
  Pb.zeros();
  for(size_t i=0;i<stack.size();i++) {
    Pa+=sol(i)*stack[i].Pa;
    Pb+=sol(i)*stack[i].Pb;
  }
}
コード例 #2
0
ファイル: diis.cpp プロジェクト: Monkey---Brainz/erkale
void uDIIS::solve_F(arma::mat & Fa, arma::mat & Fb) {
  arma::vec sol=get_w();
 
  // Form weighted Fock matrix
  Fa.zeros();
  Fb.zeros();
  for(size_t i=0;i<stack.size();i++) {
    Fa+=sol(i)*stack[i].Fa;
    Fb+=sol(i)*stack[i].Fb;
  }
}
コード例 #3
0
ファイル: hmm_impl.hpp プロジェクト: dblalock/mlpack-ios
void HMM<Distribution>::Backward(const arma::mat& dataSeq,
                                 const arma::vec& scales,
                                 arma::mat& backwardProb) const
{
  // Our goal is to calculate the backward probabilities:
  //  P(X_k | o_{k + 1:T}) for all possible states X_k, for each time point k.
  backwardProb.zeros(transition.n_rows, dataSeq.n_cols);

  // The last element probability is 1.
  backwardProb.col(dataSeq.n_cols - 1).fill(1);

  // Now step backwards through all other observations.
  for (size_t t = dataSeq.n_cols - 2; t + 1 > 0; t--)
  {
    for (size_t j = 0; j < transition.n_rows; j++)
    {
      // The backward probability of state j at time t is the sum over all state
      // of the probability of the next state having been a transition from the
      // current state multiplied by the probability of each of those states
      // emitting the given observation.
      for (size_t state = 0; state < transition.n_rows; state++)
        backwardProb(j, t) += transition(state, j) * backwardProb(state, t + 1)
            * emission[state].Probability(dataSeq.unsafe_col(t + 1));

      // Normalize by the weights from the forward algorithm.
      backwardProb(j, t) /= scales[t + 1];
    }
  }
}
コード例 #4
0
ファイル: test_function.cpp プロジェクト: sbrodehl/mlpack
void SGDTestFunction::Gradient(const arma::mat& coordinates,
                               const size_t begin,
                               arma::mat& gradient,
                               const size_t batchSize) const
{
  gradient.zeros(3);

  for (size_t i = begin; i < begin + batchSize; ++i)
  {
    switch (visitationOrder(i))
    {
      case 0:
        if (coordinates[0] >= 0)
          gradient[0] += std::exp(-coordinates[0]);
        else
          gradient[0] += -std::exp(coordinates[0]);
        break;

      case 1:
        gradient[1] += 2 * coordinates[1];
        break;

      case 2:
        gradient[2] += 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2];
        break;
    }
  }

  gradient /= batchSize;
}
コード例 #5
0
ファイル: random_acol_init.hpp プロジェクト: 0x0all/mlpack
  inline static void Initialize(const MatType& V,
                                const size_t r,
                                arma::mat& W,
                                arma::mat& H)
  {
    const size_t n = V.n_rows;
    const size_t m = V.n_cols;

    if (columnsToAverage > m)
    {
      Log::Warn << "Number of random columns (columnsToAverage) is more than "
          << "the number of columns available in the V matrix; weird results "
          << "may ensue!" << std::endl;
    }

    W.zeros(n, r);

    // Initialize W matrix with random columns.
    for (size_t col = 0; col < r; col++)
    {
      for (size_t randCol = 0; randCol < columnsToAverage; randCol++)
      {
        // .col() does not work in this case, as of Armadillo 3.920.
        W.unsafe_col(col) += V.col(math::RandInt(0, m));
      }
    }

    // Now divide by p.
    W /= columnsToAverage;

    // Initialize H to random values.
    H.randu(r, m);
  }
コード例 #6
0
void LovaszThetaSDP::GradientConstraint(const size_t index,
                                        const arma::mat& coordinates,
                                        arma::mat& gradient)
{
//  Log::Debug << "Gradient of constraint " << index << " is " << std::endl;
  if (index == 0) // This is the constraint Tr(X) = 1.
  {
    gradient = 2 * coordinates; // d/dR (Tr(R R^T)) = 2 R.
//    std::cout << gradient;
    return;
  }

//  Log::Debug << "Evaluating gradient of constraint " << index << " with ";
  size_t i = edges(0, index - 1);
  size_t j = edges(1, index - 1);
//  Log::Debug << "i = " << i << " and j = " << j << "." << std::endl;

  // Since the constraint is (R^T R)_ij, the gradient for (x, y) will be (I
  // derived this for one of the MVU constraints):
  //   0     , y != i, y != j
  //   2 R_xj, y  = i, y != j
  //   2 R_xi, y != i, y  = j
  //   4 R_xy, y  = i, y  = j
  // This results in the gradient matrix having two nonzero rows; for row
  // i, the elements are R_nj, where n is the row; for column j, the elements
  // are R_ni.
  gradient.zeros(coordinates.n_rows, coordinates.n_cols);

  gradient.col(i) = coordinates.col(j);
  gradient.col(j) += coordinates.col(i); // In case j = i (shouldn't happen).

//  std::cout << gradient;
}
コード例 #7
0
void RegularizedSVDFunction::Gradient(const arma::mat& parameters,
                                      arma::mat& gradient) const
{
  // For an example with rating corresponding to user 'i' and item 'j', the
  // gradients for the parameters is as follows:
  //           grad(u(i)) = lambda * u(i) - error * v(j)
  //           grad(v(j)) = lambda * v(j) - error * u(i)
  // 'error' is the prediction error for that example, which is:
  //           rating(i, j) - u(i).t() * v(j)
  // The full gradient is calculated by summing the contributions over all the
  // training examples.

  gradient.zeros(rank, numUsers + numItems);

  for (size_t i = 0; i < data.n_cols; i++)
  {
    // Indices for accessing the the correct parameter columns.
    const size_t user = data(0, i);
    const size_t item = data(1, i) + numUsers;

    // Prediction error for the example.
    const double rating = data(2, i);
    double ratingError = rating - arma::dot(parameters.col(user),
                                            parameters.col(item));

    // Gradient is non-zero only for the parameter columns corresponding to the
    // example.
    gradient.col(user) += 2 * (lambda * parameters.col(user) -
                               ratingError * parameters.col(item));
    gradient.col(item) += 2 * (lambda * parameters.col(item) -
                               ratingError * parameters.col(user));
  }
}
コード例 #8
0
void GockenbachFunction::GradientConstraint(const size_t index,
                                            const arma::mat& coordinates,
                                            arma::mat& gradient)
{
  gradient.zeros(3, 1);

  switch (index)
  {
    case 0:
      // g'_x1(x) = -1
      // g'_x2(x) = -1
      // g'_x3(x) = 1
      gradient[0] = -1;
      gradient[1] = -1;
      gradient[2] = 1;
      break;

    case 1:
      // h'_x1(x) = -2 x_1
      // h'_x2(x) = 0
      // h'_x3(x) = 1
      gradient[0] = -2 * coordinates[0];
      gradient[2] = 1;
      break;
  }
}
コード例 #9
0
ファイル: diis.cpp プロジェクト: Monkey---Brainz/erkale
void rDIIS::solve_P(arma::mat & P) {
  arma::vec sol=get_w();
 
  // Form weighted density matrix
  P.zeros();
  for(size_t i=0;i<stack.size();i++)
    P+=sol(i)*stack[i].P;
}
コード例 #10
0
ファイル: diis.cpp プロジェクト: Monkey---Brainz/erkale
void rDIIS::solve_F(arma::mat & F) {
  arma::vec sol=get_w();
 
  // Form weighted Fock matrix
  F.zeros();
  for(size_t i=0;i<stack.size();i++)
    F+=sol(i)*stack[i].F;
}
コード例 #11
0
ファイル: test_functions.cpp プロジェクト: YaweiZhao/mlpack
//! Calculate the gradient of one of the individual functions.
void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates,
                                             const size_t i,
                                             arma::mat& gradient) const
{
  gradient.zeros(n);

  gradient[i] = 400 * (std::pow(coordinates[i], 3) - coordinates[i] *
      coordinates[i + 1]) + 2 * (coordinates[i] - 1);
  gradient[i + 1] = 200 * (coordinates[i + 1] - std::pow(coordinates[i], 2));
}
コード例 #12
0
ファイル: hmm_impl.hpp プロジェクト: shenzebang/mlpack
void HMM<Distribution>::Smooth(const arma::mat& dataSeq,
                               arma::mat& smoothSeq) const
{
  // First run the forward algorithm.
  arma::mat stateProb;
  Estimate(dataSeq, stateProb);

  // Compute expected emissions.
  // Will not work for distributions without a Mean() function.
  smoothSeq.zeros(dimensionality, dataSeq.n_cols);
  for (size_t i = 0; i < emission.size(); i++)
    smoothSeq += emission[i].Mean() * stateProb.row(i);
}
コード例 #13
0
ファイル: rnn_impl.hpp プロジェクト: AmesianX/mlpack
void RNN<
LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
>::Gradient(const arma::mat& /* unused */,
            const size_t i,
            arma::mat& gradient)
{
  if (gradient.is_empty())
  {
    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
  }
  else
  {
    gradient.zeros();
  }

  Evaluate(parameter, i, false);

  arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols);
  NetworkGradients(currentGradient, network);

  const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
      1, false, true);

  // Iterate through the input sequence and perform the feed backward pass.
  for (seqNum = seqLen - 1; seqNum >= 0; seqNum--)
  {
    // Load the network activation for the upcoming backward pass.
    LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) *
        inputSize - 1), network);

    // Perform the backward pass.
    if (seqOutput)
    {
      arma::mat seqError = error.unsafe_col(seqNum);
      Backward(seqError, network);
    }
    else
    {
      Backward(error, network);
    }

    // Link the parameters and update the gradients.
    LinkParameter(network);
    UpdateGradients<>(network);

    // Update the overall gradient.
    gradient += currentGradient;

    if (seqNum == 0) break;
  }
}
コード例 #14
0
void AugLagrangianTestFunction::GradientConstraint(const size_t index,
    const arma::mat& /* coordinates */,
    arma::mat& gradient)
{
  // If the user passed an invalid index (not 0), we will return a zero
  // gradient.
  gradient.zeros(2, 1);

  if (index == 0)
  {
    // c'_x1(x) = 1
    // c'_x2(x) = 1
    gradient.ones(2, 1); // Use a shortcut instead of assigning individually.
  }
}
コード例 #15
0
ファイル: test_functions.cpp プロジェクト: sbrodehl/mlpack
//! Calculate the gradient of one of the individual functions.
void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates,
                                             const size_t i,
                                             arma::mat& gradient,
                                             const size_t batchSize) const
{
  gradient.zeros(n);

  for (size_t j = i; j < i + batchSize; ++j)
  {
    const size_t p = visitationOrder[j];
    gradient[p] = 400 * (std::pow(coordinates[p], 3) - coordinates[p] *
        coordinates[p + 1]) + 2 * (coordinates[p] - 1);
    gradient[p + 1] = 200 * (coordinates[p + 1] - std::pow(coordinates[p], 2));
  }
}
コード例 #16
0
ファイル: sa_impl.hpp プロジェクト: 0x0all/mlpack
void SA<FunctionType, CoolingScheduleType>::MoveControl(const size_t nMoves,
                                                        arma::mat& accept)
{
  arma::mat target;
  target.copy_size(accept);
  target.fill(0.44);
  moveSize = arma::log(moveSize);
  moveSize += gain * (accept / (double) nMoves - target);
  moveSize = arma::exp(moveSize);

  // To avoid the use of element-wise arma::min(), which is only available in
  // Armadillo after v3.930, we use a for loop here instead.
  for (size_t i = 0; i < accept.n_elem; ++i)
    moveSize(i) = (moveSize(i) > maxMove(i)) ? maxMove(i) : moveSize(i);

  accept.zeros();
}
コード例 #17
0
ファイル: find_P.cpp プロジェクト: fedral/cpd
double find_P(const arma::mat& X, const arma::mat& Y, double sigma2,
              float outliers, arma::vec& P1, arma::vec& Pt1, arma::mat& PX,
              bool use_fgt, const float epsilon) {
    P1.zeros();
    Pt1.zeros();
    PX.zeros();

    const arma::uword N = X.n_rows;
    const arma::uword M = Y.n_rows;
    const arma::uword D = Y.n_cols;

    const double h = std::sqrt(2 * sigma2);
    const double ndi = (outliers * M * std::pow(2 * M_PI * sigma2, 0.5 * D)) /
                       ((1 - outliers) * N);
    arma::vec q = arma::ones<arma::vec>(M);

    fgt::GaussTransformUnqPtr transformY;
    if (use_fgt) {
        transformY = fgt::choose_gauss_transform(Y, h, epsilon);
    } else {
        transformY.reset(new fgt::Direct(Y, h));
    }
    arma::vec denomP = transformY->compute(X, q);

    denomP = denomP + ndi;
    Pt1 = 1 - ndi / denomP;
    q = 1 / denomP;

    fgt::GaussTransformUnqPtr transformX;
    if (use_fgt) {
        transformX = fgt::choose_gauss_transform(X, h, epsilon);
    } else {
        transformX.reset(new fgt::Direct(X, h));
    }
    P1 = transformX->compute(Y, q);

    for (arma::uword i = 0; i < D; ++i) {
        q = X.col(i) / denomP;
        arma::vec c = PX.unsafe_col(i);
        PX.col(i) = transformX->compute(Y, q);
    }

    return -arma::sum(arma::log(denomP)) + D * N * std::log(sigma2) / 2;
}
コード例 #18
0
ファイル: hmm_impl.hpp プロジェクト: shenzebang/mlpack
void HMM<Distribution>::Filter(const arma::mat& dataSeq,
                               arma::mat& filterSeq,
                               size_t ahead) const
{
  // First run the forward algorithm.
  arma::mat forwardProb;
  arma::vec scales;
  Forward(dataSeq, scales, forwardProb);

  // Propagate state ahead.
  if (ahead != 0)
    forwardProb = pow(transition, ahead) * forwardProb;

  // Compute expected emissions.
  // Will not work for distributions without a Mean() function.
  filterSeq.zeros(dimensionality, dataSeq.n_cols);
  for (size_t i = 0; i < emission.size(); i++)
    filterSeq += emission[i].Mean() * forwardProb.row(i);
}
コード例 #19
0
ファイル: svd_wrapper_impl.hpp プロジェクト: 0x0all/mlpack
double mlpack::cf::SVDWrapper<Factorizer>::Apply(const arma::mat& V,
                         arma::mat& W,
                         arma::mat& sigma,
                         arma::mat& H) const
{
  // get svd factorization
  arma::vec E;
  factorizer.Apply(W, E, H, V);

  // construct sigma matrix
  sigma.zeros(V.n_rows, V.n_cols);

  for(size_t i = 0;i < sigma.n_rows && i < sigma.n_cols;i++)
    sigma(i, i) = E(i, 0);

  arma::mat V_rec = W * sigma * arma::trans(H);

  // return normalized frobenius error
  return arma::norm(V - V_rec, "fro") / arma::norm(V, "fro");
}
コード例 #20
0
void AugLagrangianFunction<LagrangianFunction>::Gradient(
    const arma::mat& coordinates,
    arma::mat& gradient) const
{
  // The augmented Lagrangian's gradient is evaluted as
  // f'(x) + {(-lambda_i + sigma * c_i(x)) * c'_i(x)} for all constraints
  gradient.zeros();
  function.Gradient(coordinates, gradient);

  arma::mat constraintGradient; // Temporary for constraint gradients.
  for (size_t i = 0; i < function.NumConstraints(); i++)
  {
    function.GradientConstraint(i, coordinates, constraintGradient);

    // Now calculate scaling factor and add to existing gradient.
    arma::mat tmpGradient;
    tmpGradient = (-lambda[i] + sigma *
        function.EvaluateConstraint(i, coordinates)) * constraintGradient;
    gradient += tmpGradient;
  }
}
コード例 #21
0
ファイル: hmm_impl.hpp プロジェクト: shenzebang/mlpack
void HMM<Distribution>::Forward(const arma::mat& dataSeq,
                                arma::vec& scales,
                                arma::mat& forwardProb) const
{
  // Our goal is to calculate the forward probabilities:
  //  P(X_k | o_{1:k}) for all possible states X_k, for each time point k.
  forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
  scales.zeros(dataSeq.n_cols);

  // The first entry in the forward algorithm uses the initial state
  // probabilities.  Note that MATLAB assumes that the starting state (at
  // t = -1) is state 0; this is not our assumption here.  To force that
  // behavior, you could append a single starting state to every single data
  // sequence and that should produce results in line with MATLAB.
  for (size_t state = 0; state < transition.n_rows; state++)
    forwardProb(state, 0) = initial(state) *
        emission[state].Probability(dataSeq.unsafe_col(0));

  // Then normalize the column.
  scales[0] = accu(forwardProb.col(0));
  forwardProb.col(0) /= scales[0];

  // Now compute the probabilities for each successive observation.
  for (size_t t = 1; t < dataSeq.n_cols; t++)
  {
    for (size_t j = 0; j < transition.n_rows; j++)
    {
      // The forward probability of state j at time t is the sum over all states
      // of the probability of the previous state transitioning to the current
      // state and emitting the given observation.
      forwardProb(j, t) = accu(forwardProb.col(t - 1) %
          trans(transition.row(j))) *
          emission[j].Probability(dataSeq.unsafe_col(t));
    }

    // Normalize probability.
    scales[t] = accu(forwardProb.col(t));
    forwardProb.col(t) /= scales[t];
  }
}
コード例 #22
0
void SGDTestFunction::Gradient(const arma::mat& coordinates,
                               const size_t i,
                               arma::mat& gradient) const
{
  gradient.zeros(3);
  switch (i)
  {
    case 0:
      if (coordinates[0] >= 0)
        gradient[0] = std::exp(-coordinates[0]);
      else
        gradient[0] = -std::exp(coordinates[1]);
      break;

    case 1:
      gradient[1] = 2 * coordinates[1];
      break;

    case 2:
      gradient[2] = 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2];
      break;
  }
}
コード例 #23
0
ファイル: hmm_impl.hpp プロジェクト: dblalock/mlpack-ios
void HMM<Distribution>::Forward(const arma::mat& dataSeq,
                                arma::vec& scales,
                                arma::mat& forwardProb) const
{
  // Our goal is to calculate the forward probabilities:
  //  P(X_k | o_{1:k}) for all possible states X_k, for each time point k.
  forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
  scales.zeros(dataSeq.n_cols);

  // Starting state (at t = -1) is assumed to be state 0.  This is what MATLAB
  // does in their hmmdecode() function, so we will emulate that behavior.
  for (size_t state = 0; state < transition.n_rows; state++)
    forwardProb(state, 0) = transition(state, 0) *
        emission[state].Probability(dataSeq.unsafe_col(0));

  // Then normalize the column.
  scales[0] = accu(forwardProb.col(0));
  forwardProb.col(0) /= scales[0];

  // Now compute the probabilities for each successive observation.
  for (size_t t = 1; t < dataSeq.n_cols; t++)
  {
    for (size_t j = 0; j < transition.n_rows; j++)
    {
      // The forward probability of state j at time t is the sum over all states
      // of the probability of the previous state transitioning to the current
      // state and emitting the given observation.
      forwardProb(j, t) = accu(forwardProb.col(t - 1) %
          trans(transition.row(j))) *
          emission[j].Probability(dataSeq.unsafe_col(t));
    }

    // Normalize probability.
    scales[t] = accu(forwardProb.col(t));
    forwardProb.col(t) /= scales[t];
  }
}
コード例 #24
0
double PellegMooreKMeans<MetricType, MatType>::Iterate(
    const arma::mat& centroids,
    arma::mat& newCentroids,
    arma::Col<size_t>& counts)
{
  newCentroids.zeros(centroids.n_rows, centroids.n_cols);
  counts.zeros(centroids.n_cols);

  // Create rules object.
  typedef PellegMooreKMeansRules<MetricType, TreeType> RulesType;
  RulesType rules(dataset, centroids, newCentroids, counts, metric);

  // Use single-tree traverser.
  typename TreeType::template SingleTreeTraverser<RulesType> traverser(rules);

  // Now, do a traversal with a fake query index (since the query index is
  // irrelevant; we are checking each node with all clusters.
  traverser.Traverse(0, *tree);

  distanceCalculations += rules.DistanceCalculations();

  // Now, calculate how far the clusters moved, after normalizing them.
  double residual = 0.0;
  for (size_t c = 0; c < centroids.n_cols; ++c)
  {
    if (counts[c] > 0)
    {
      newCentroids.col(c) /= counts(c);
      residual += std::pow(metric.Evaluate(centroids.col(c),
                                           newCentroids.col(c)), 2.0);
    }
  }
  distanceCalculations += centroids.n_cols;

  return std::sqrt(residual);
}
コード例 #25
0
double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
    const arma::mat& centroids,
    arma::mat& newCentroids,
    arma::Col<size_t>& counts)
{
  newCentroids.zeros(centroids.n_rows, centroids.n_cols);
  counts.zeros(centroids.n_cols);
  if (clusterDistances.n_elem != centroids.n_cols + 1)
  {
    clusterDistances.set_size(centroids.n_cols + 1);
    clusterDistances.fill(DBL_MAX / 2.0); // To prevent overflow.
  }

  // Build a tree on the centroids.
  std::vector<size_t> oldFromNewCentroids;
  TreeType* centroidTree = BuildTree<TreeType>(
      const_cast<typename TreeType::Mat&>(centroids), oldFromNewCentroids);

  // Now run the dual-tree algorithm.
  typedef DualTreeKMeansRules<MetricType, TreeType> RulesType;
  RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids,
      iteration, clusterDistances, distances, assignments, distanceIteration,
      metric);

  // Use the dual-tree traverser.
//typename TreeType::template DualTreeTraverser<RulesType> traverser(rules);
  typename TreeType::template BreadthFirstDualTreeTraverser<RulesType>
      traverser(rules);

  traverser.Traverse(*centroidTree, *tree);

  distanceCalculations += rules.DistanceCalculations();

  // Now, calculate how far the clusters moved, after normalizing them.
  double residual = 0.0;
  clusterDistances.zeros();
  for (size_t c = 0; c < centroids.n_cols; ++c)
  {
    if (counts[c] == 0)
    {
      newCentroids.col(c).fill(DBL_MAX); // Should have happened anyway I think.
    }
    else
    {
      const size_t oldCluster = oldFromNewCentroids[c];
      newCentroids.col(oldCluster) /= counts(oldCluster);
      const double dist = metric.Evaluate(centroids.col(c),
                                          newCentroids.col(oldCluster));
      if (dist > clusterDistances[centroids.n_cols])
        clusterDistances[centroids.n_cols] = dist;
      clusterDistances[oldCluster] = dist;
      residual += std::pow(dist, 2.0);
    }
  }
  Log::Info << clusterDistances.t();

  delete centroidTree;

  ++iteration;
  return std::sqrt(residual);
}
コード例 #26
0
void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
                                                const size_t i,
                                                arma::mat& gradient)
{
  // We will need to calculate p_i before this evaluation is done, so these two
  // variables will hold the information necessary for that.
  double numerator = 0;
  double denominator = 0;

  // The gradient involves two matrix terms which are eventually combined into
  // one.
  arma::mat firstTerm;
  arma::mat secondTerm;

  firstTerm.zeros(coordinates.n_rows, coordinates.n_cols);
  secondTerm.zeros(coordinates.n_rows, coordinates.n_cols);

  // Compute the stretched dataset.
  stretchedDataset = coordinates * dataset;

  for (size_t k = 0; k < dataset.n_cols; ++k)
  {
    // Don't consider the case where the points are the same.
    if (i == k)
      continue;

    // Calculate the numerator of p_ik.
    double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
                                       stretchedDataset.unsafe_col(k)));

    // If the points are in the same class, we must add to the second term of
    // the gradient as well as the numerator of p_i.  We will divide by the
    // denominator of p_ik later.  For x_ik we are not using stretched points.
    arma::vec x_ik = dataset.col(i) - dataset.col(k);
    if (labels[i] == labels[k])
    {
      numerator += eval;
      secondTerm += eval * x_ik * trans(x_ik);
    }

    // We always have to add to the denominator of p_i and the first term of the
    // gradient computation.  We will divide by the denominator of p_ik later.
    denominator += eval;
    firstTerm += eval * x_ik * trans(x_ik);
  }

  // Calculate p_i.
  double p = 0;
  if (denominator == 0)
  {
    Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
    // If the denominator is zero, then all p_ik should be zero and there is
    // no gradient contribution from this point.
    gradient.zeros(coordinates.n_rows, coordinates.n_rows);
    return;
  }
  else
  {
    p = numerator / denominator;
    firstTerm /= denominator;
    secondTerm /= denominator;
  }

  // Now multiply the first term by p_i, and add the two together and multiply
  // all by 2 * A.  We negate it though, because our optimizer is a minimizer.
  gradient = -2 * coordinates * (p * firstTerm - secondTerm);
}