double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates) { // Calculate the denominators and numerators, if necessary. Precalculate(coordinates); return -accu(p); // Sum of p_i for all i. We negate because our solver // minimizes, not maximizes. };
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data, const size_t emptyCluster, const arma::mat& oldCentroids, arma::mat& newCentroids, arma::Col<size_t>& clusterCounts, MetricType& metric, const size_t iteration) { // If necessary, calculate the variances and assignments. if (iteration != this->iteration || assignments.n_elem != data.n_cols) Precalculate(data, oldCentroids, clusterCounts, metric); this->iteration = iteration; // Now find the cluster with maximum variance. arma::uword maxVarCluster; variances.max(maxVarCluster); // Now, inside this cluster, find the point which is furthest away. size_t furthestPoint = data.n_cols; double maxDistance = -DBL_MAX; for (size_t i = 0; i < data.n_cols; ++i) { if (assignments[i] == maxVarCluster) { const double distance = std::pow(metric.Evaluate(data.col(i), newCentroids.col(maxVarCluster)), 2.0); if (distance > maxDistance) { maxDistance = distance; furthestPoint = i; } } } // Take that point and add it to the empty cluster. newCentroids.col(maxVarCluster) *= (double(clusterCounts[maxVarCluster]) / double(clusterCounts[maxVarCluster] - 1)); newCentroids.col(maxVarCluster) -= (1.0 / (clusterCounts[maxVarCluster] - 1.0)) * arma::vec(data.col(furthestPoint)); clusterCounts[maxVarCluster]--; clusterCounts[emptyCluster]++; newCentroids.col(emptyCluster) = arma::vec(data.col(furthestPoint)); assignments[furthestPoint] = emptyCluster; // Modify the variances, as necessary. variances[emptyCluster] = 0; // One has already been subtracted from clusterCounts[maxVarCluster]. variances[maxVarCluster] = (1.0 / (clusterCounts[maxVarCluster])) * ((clusterCounts[maxVarCluster] + 1) * variances[maxVarCluster] - maxDistance); // Output some debugging information. Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " << emptyCluster << ".\n"; return 1; // We only changed one point. }
void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates, arma::mat& gradient) { // Calculate the denominators and numerators, if necessary. Precalculate(coordinates); // Now, we handle the summation over i: // sum_i (p_i sum_k (p_ik x_ik x_ik^T) - // sum_{j in class of i} (p_ij x_ij x_ij^T) // We can algebraically manipulate the whole thing to produce a more // memory-friendly way to calculate this. Looping over each i and k (again // O((n * (n + 1)) / 2) as with the last step, we can add the following to the // sum: // // if class of i is the same as the class of k, add // (((p_i - (1 / p_i)) p_ik) + ((p_k - (1 / p_k)) p_ki)) x_ik x_ik^T // otherwise, add // (p_i p_ik + p_k p_ki) x_ik x_ik^T arma::mat sum; sum.zeros(stretchedDataset.n_rows, stretchedDataset.n_rows); for (size_t i = 0; i < stretchedDataset.n_cols; i++) { for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++) { // Calculate p_ik and p_ki first. double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i), stretchedDataset.unsafe_col(k))); double p_ik = 0, p_ki = 0; p_ik = eval / denominators(i); p_ki = eval / denominators(k); // Subtract x_i from x_k. We are not using stretched points here. arma::vec x_ik = dataset.col(i) - dataset.col(k); arma::mat secondTerm = (x_ik * trans(x_ik)); if (labels[i] == labels[k]) sum += ((p[i] - 1) * p_ik + (p[k] - 1) * p_ki) * secondTerm; else sum += (p[i] * p_ik + p[k] * p_ki) * secondTerm; } } // Assemble the final gradient. gradient = -2 * coordinates * sum; }