const BallBound<VecType>&
BallBound<VecType>::operator|=(const MatType& data)
{
  if (radius < 0)
  {
    center = data.col(0);
    radius = 0;
  }

  // Now iteratively add points.  There is probably a closed-form solution to
  // find the minimum bounding circle, and it is probably faster.
  for (size_t i = 1; i < data.n_cols; ++i)
  {
    double dist = metric::EuclideanDistance::Evaluate(center, (VecType)
        data.col(i)) - radius;

    if (dist > 0)
    {
      // Move (dist / 2) towards the new point and increase radius by
      // (dist / 2).
      arma::vec diff = data.col(i) - center;
      center += 0.5 * diff;
      radius += 0.5 * dist;
    }
  }

  return *this;
}
const BallBound<MetricType, VecType>&
BallBound<MetricType, VecType>::operator|=(const MatType& data)
{
  if (radius < 0)
  {
    center = data.col(0);
    radius = 0;
  }

  // Now iteratively add points.
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    const ElemType dist = metric->Evaluate(center, (VecType) data.col(i));

    // See if the new point lies outside the bound.
    if (dist > radius)
    {
      // Move towards the new point and increase the radius just enough to
      // accommodate the new point.
      const VecType diff = data.col(i) - center;
      center += ((dist - radius) / (2 * dist)) * diff;
      radius = 0.5 * (dist + radius);
    }
  }

  return *this;
}
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
                                           const size_t emptyCluster,
                                           const arma::mat& oldCentroids,
                                           arma::mat& newCentroids,
                                           arma::Col<size_t>& clusterCounts,
                                           MetricType& metric,
                                           const size_t iteration)
{
  // If necessary, calculate the variances and assignments.
  if (iteration != this->iteration || assignments.n_elem != data.n_cols)
    Precalculate(data, oldCentroids, clusterCounts, metric);
  this->iteration = iteration;

  // Now find the cluster with maximum variance.
  arma::uword maxVarCluster;
  variances.max(maxVarCluster);

  // Now, inside this cluster, find the point which is furthest away.
  size_t furthestPoint = data.n_cols;
  double maxDistance = -DBL_MAX;
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    if (assignments[i] == maxVarCluster)
    {
      const double distance = std::pow(metric.Evaluate(data.col(i),
          newCentroids.col(maxVarCluster)), 2.0);

      if (distance > maxDistance)
      {
        maxDistance = distance;
        furthestPoint = i;
      }
    }
  }

  // Take that point and add it to the empty cluster.
  newCentroids.col(maxVarCluster) *= (double(clusterCounts[maxVarCluster]) /
      double(clusterCounts[maxVarCluster] - 1));
  newCentroids.col(maxVarCluster) -= (1.0 / (clusterCounts[maxVarCluster] - 1.0)) *
      arma::vec(data.col(furthestPoint));
  clusterCounts[maxVarCluster]--;
  clusterCounts[emptyCluster]++;
  newCentroids.col(emptyCluster) = arma::vec(data.col(furthestPoint));
  assignments[furthestPoint] = emptyCluster;

  // Modify the variances, as necessary.
  variances[emptyCluster] = 0;
  // One has already been subtracted from clusterCounts[maxVarCluster].
  variances[maxVarCluster] = (1.0 / (clusterCounts[maxVarCluster])) *
      ((clusterCounts[maxVarCluster] + 1) * variances[maxVarCluster] - maxDistance);

  // Output some debugging information.
  Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " <<
      emptyCluster << ".\n";

  return 1; // We only changed one point.
}
Exemple #4
0
size_t PerformSplit(MatType& data,
                    const size_t begin,
                    const size_t count,
                    const typename SplitType::SplitInfo& splitInfo,
                    std::vector<size_t>& oldFromNew)
{
  // This method modifies the input dataset.  We loop both from the left and
  // right sides of the points contained in this node.
  size_t left = begin;
  size_t right = begin + count - 1;

  // First half-iteration of the loop is out here because the termination
  // condition is in the middle.
  while ((left <= right) &&
         (SplitType::AssignToLeftNode(data.col(left), splitInfo)))
    left++;
  while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
         (left <= right) && (right > 0))
    right--;

  // Shortcut for when all points are on the right.
  if (left == right && right == 0)
    return left;

  while (left <= right)
  {
    // Swap columns.
    data.swap_cols(left, right);

    // Update the indices for what we changed.
    size_t t = oldFromNew[left];
    oldFromNew[left] = oldFromNew[right];
    oldFromNew[right] = t;

    // See how many points on the left are correct.  When they are correct,
    // increase the left counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it later.
    while (SplitType::AssignToLeftNode(data.col(left), splitInfo) &&
        (left <= right))
      left++;

    // Now see how many points on the right are correct.  When they are correct,
    // decrease the right counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it with the wrong point we found in the
    // previous loop.
    while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) &&
        (left <= right))
      right--;
  }

  Log::Assert(left == right + 1);
  return left;
}
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
                                           const size_t emptyCluster,
                                           const MatType& centroids,
                                           arma::Col<size_t>& clusterCounts,
                                           arma::Col<size_t>& assignments)
{
  // First, we need to find the cluster with maximum variance (by which I mean
  // the sum of the covariance matrices).
  arma::vec variances;
  variances.zeros(clusterCounts.n_elem); // Start with 0.

  // Add the variance of each point's distance away from the cluster.  I think
  // this is the sensible thing to do.
  for (size_t i = 0; i < data.n_cols; i++)
  {
    variances[assignments[i]] += arma::as_scalar(
        arma::var(data.col(i) - centroids.col(assignments[i])));
  }

  // Now find the cluster with maximum variance.
  arma::uword maxVarCluster;
  variances.max(maxVarCluster);

  // Now, inside this cluster, find the point which is furthest away.
  size_t furthestPoint = data.n_cols;
  double maxDistance = -DBL_MAX;
  for (size_t i = 0; i < data.n_cols; i++)
  {
    if (assignments[i] == maxVarCluster)
    {
      double distance = arma::as_scalar(
          arma::var(data.col(i) - centroids.col(maxVarCluster)));

      if (distance > maxDistance)
      {
        maxDistance = distance;
        furthestPoint = i;
      }
    }
  }

  // Take that point and add it to the empty cluster.
  clusterCounts[maxVarCluster]--;
  clusterCounts[emptyCluster]++;
  assignments[furthestPoint] = emptyCluster;

  // Output some debugging information.
  Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " <<
      emptyCluster << ".\n";

  return 1; // We only changed one point.
}
Exemple #6
0
void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Train(
    const MatType& data,
    const arma::Row<size_t>& labels,
    const arma::rowvec& instanceWeights)
{
  size_t j, i = 0;
  bool converged = false;
  size_t tempLabel;
  arma::uword maxIndexRow, maxIndexCol;
  arma::mat tempLabelMat;

  LearnPolicy LP;

  const bool hasWeights = (instanceWeights.n_elem > 0);

  while ((i < maxIterations) && (!converged))
  {
    // This outer loop is for each iteration, and we use the 'converged'
    // variable for noting whether or not convergence has been reached.
    i++;
    converged = true;

    // Now this inner loop is for going through the dataset in each iteration.
    for (j = 0; j < data.n_cols; j++)
    {
      // Multiply for each variable and check whether the current weight vector
      // correctly classifies this.
      tempLabelMat = weights.t() * data.col(j) + biases;

      tempLabelMat.max(maxIndexRow, maxIndexCol);

      // Check whether prediction is correct.
      if (maxIndexRow != labels(0, j))
      {
        // Due to incorrect prediction, convergence set to false.
        converged = false;
        tempLabel = labels(0, j);

        // Send maxIndexRow for knowing which weight to update, send j to know
        // the value of the vector to update it with.  Send tempLabel to know
        // the correct class.
        if (hasWeights)
          LP.UpdateWeights(data.col(j), weights, biases, maxIndexRow, tempLabel,
              instanceWeights(j));
        else
          LP.UpdateWeights(data.col(j), weights, biases, maxIndexRow,
              tempLabel);
      }
    }
  }
}
Exemple #7
0
  inline static void Initialize(const MatType& V,
                                const size_t r,
                                arma::mat& W,
                                arma::mat& H)
  {
    const size_t n = V.n_rows;
    const size_t m = V.n_cols;

    if (columnsToAverage > m)
    {
      Log::Warn << "Number of random columns (columnsToAverage) is more than "
          << "the number of columns available in the V matrix; weird results "
          << "may ensue!" << std::endl;
    }

    W.zeros(n, r);

    // Initialize W matrix with random columns.
    for (size_t col = 0; col < r; col++)
    {
      for (size_t randCol = 0; randCol < columnsToAverage; randCol++)
      {
        // .col() does not work in this case, as of Armadillo 3.920.
        W.unsafe_col(col) += V.col(math::RandInt(0, m));
      }
    }

    // Now divide by p.
    W /= columnsToAverage;

    // Initialize H to random values.
    H.randu(r, m);
  }
void MaxVarianceNewCluster::Precalculate(const MatType& data,
                                         const arma::mat& oldCentroids,
                                         arma::Col<size_t>& clusterCounts,
                                         MetricType& metric)
{
  // We have to calculate the variances of each cluster and the assignments of
  // each point.  This is most easily done by iterating through the entire
  // dataset.
  variances.zeros(oldCentroids.n_cols);
  assignments.set_size(data.n_cols);

  // Add the variance of each point's distance away from the cluster.  I think
  // this is the sensible thing to do.
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    // Find the closest centroid to this point.
    double minDistance = std::numeric_limits<double>::infinity();
    size_t closestCluster = oldCentroids.n_cols; // Invalid value.

    for (size_t j = 0; j < oldCentroids.n_cols; j++)
    {
      const double distance = metric.Evaluate(data.col(i), oldCentroids.col(j));

      if (distance < minDistance)
      {
        minDistance = distance;
        closestCluster = j;
      }
    }

    assignments[i] = closestCluster;
    variances[closestCluster] += std::pow(metric.Evaluate(data.col(i),
        oldCentroids.col(closestCluster)), 2.0);
  }

  // Divide by the number of points in the cluster to produce the variance,
  // unless the cluster is empty or contains only one point, in which case we
  // set the variance to 0.
  for (size_t i = 0; i < clusterCounts.n_elem; ++i)
    if (clusterCounts[i] <= 1)
      variances[i] = 0;
    else
      variances[i] /= clusterCounts[i];
}
void UBTreeSplit<BoundType, MatType>::InitializeAddresses(const MatType& data)
{
  addresses.resize(data.n_cols);

  // Calculate all addresses.
  for (size_t i = 0; i < data.n_cols; i++)
  {
    addresses[i].first.zeros(data.n_rows);
    bound::addr::PointToAddress(addresses[i].first, data.col(i));
    addresses[i].second = i;
  }
}
 inline static void Cluster(const MatType& data,
                            const size_t clusters,
                            arma::mat& centroids)
 {
   centroids.set_size(data.n_rows, clusters);
   for (size_t i = 0; i < clusters; ++i)
   {
     // Randomly sample a point.
     const size_t index = math::RandInt(0, data.n_cols);
     centroids.col(i) = data.col(index);
   }
 }
Exemple #11
0
void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Classify(
    const MatType& test,
    arma::Row<size_t>& predictedLabels)
{
  arma::vec tempLabelMat;
  arma::uword maxIndex = 0;

  // Could probably be faster if done in batch.
  for (size_t i = 0; i < test.n_cols; i++)
  {
    tempLabelMat = weights.t() * test.col(i) + biases;
    tempLabelMat.max(maxIndex);
    predictedLabels(0, i) = maxIndex;
  }
}
void RefinedStart::Cluster(const MatType& data,
                           const size_t clusters,
                           arma::mat& centroids) const
{
  // This will hold the sampled datasets.
  const size_t numPoints = size_t(percentage * data.n_cols);
  MatType sampledData(data.n_rows, numPoints);
  // vector<bool> is packed so each bool is 1 bit.
  std::vector<bool> pointsUsed(data.n_cols, false);
  arma::mat sampledCentroids(data.n_rows, samplings * clusters);

  for (size_t i = 0; i < samplings; ++i)
  {
    // First, assemble the sampled dataset.
    size_t curSample = 0;
    while (curSample < numPoints)
    {
      // Pick a random point in [0, numPoints).
      size_t sample = (size_t) math::RandInt(data.n_cols);

      if (!pointsUsed[sample])
      {
        // This point isn't used yet.  So we'll put it in our sample.
        pointsUsed[sample] = true;
        sampledData.col(curSample) = data.col(sample);
        ++curSample;
      }
    }

    // Now, using the sampled dataset, run k-means.  In the case of an empty
    // cluster, we re-initialize that cluster as the point furthest away from
    // the cluster with maximum variance.  This is not *exactly* what the paper
    // implements, but it is quite similar, and we'll call it "good enough".
    KMeans<> kmeans;
    kmeans.Cluster(sampledData, clusters, centroids);

    // Store the sampled centroids.
    sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids;

    pointsUsed.assign(data.n_cols, false);
  }

  // Now, we run k-means on the sampled centroids to get our final clusters.
  KMeans<> kmeans;
  kmeans.Cluster(sampledCentroids, clusters, centroids);
}
bool RPTreeMaxSplit<BoundType, MatType>::GetSplitVal(
    const MatType& data,
    const size_t begin,
    const size_t count,
    const arma::Col<ElemType>& direction,
    ElemType& splitVal)
{
  const size_t maxNumSamples = 100;
  const size_t numSamples = std::min(maxNumSamples, count);
  arma::uvec samples;

  // Get no more than numSamples distinct samples.
  math::ObtainDistinctSamples(begin, begin + count, numSamples, samples);

  arma::Col<ElemType> values(samples.n_elem);

  // Find the median of scalar products of the samples and the normal vector.
  for (size_t k = 0; k < samples.n_elem; k++)
    values[k] = arma::dot(data.col(samples[k]), direction);

  const ElemType maximum = arma::max(values);
  const ElemType minimum = arma::min(values);
  if (minimum == maximum)
    return false;

  splitVal = arma::median(values);

  // Add a random deviation to the median.
  // This algorithm differs from the method suggested in the random projection
  // tree paper, for two reasons:
  //   1. Evaluating the method proposed in the paper is time-consuming, since
  //      we must solve the furthest-pair problem.
  //   2. The proposed method does not appear to guarantee that a valid split
  //      value will be generated (i.e. it can produce a split value where there
  //      may be no points on the left or the right).
  splitVal += math::Random((minimum - splitVal) * 0.75,
      (maximum - splitVal) * 0.75);

  if (splitVal == maximum)
    splitVal = minimum;

  return true;
}
bool MeanSpaceSplit<MetricType, MatType>::SplitSpace(
    const typename HyperplaneType::BoundType& bound,
    const MatType& data,
    const arma::Col<size_t>& points,
    HyperplaneType& hyp)
{
  typename HyperplaneType::ProjVectorType projVector;
  double midValue;

  if (!SpaceSplit<MetricType, MatType>::GetProjVector(bound, data, points,
      projVector, midValue))
    return false;

  double splitVal = 0.0;
  for (size_t i = 0; i < points.n_elem; i++)
    splitVal += projVector.Project(data.col(points[i]));
  splitVal /= points.n_elem;

  hyp = HyperplaneType(projVector, splitVal);

  return true;
}
Exemple #15
0
void MeanShift<UseKernel, KernelType, MatType>::GenSeeds(
    const MatType& data,
    const double binSize,
    const int minFreq,
    MatType& seeds)
{
  typedef arma::colvec VecType;
  std::map<VecType, int, less<VecType> > allSeeds;
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    VecType binnedPoint = arma::floor(data.unsafe_col(i) / binSize);
    if (allSeeds.find(binnedPoint) == allSeeds.end())
      allSeeds[binnedPoint] = 1;
    else
      allSeeds[binnedPoint]++;
  }

  // Remove seeds with too few points.  First we count the number of seeds we
  // end up with, then we add them.
  std::map<VecType, int, less<VecType> >::iterator it;
  size_t count = 0;
  for (it = allSeeds.begin(); it != allSeeds.end(); ++it)
    if (it->second >= minFreq)
      ++count;

  seeds.set_size(data.n_rows, count);
  count = 0;
  for (it = allSeeds.begin(); it != allSeeds.end(); ++it)
  {
    if (it->second >= minFreq)
    {
      seeds.col(count) = it->first;
      ++count;
    }
  }

  seeds *= binSize;
}
void RefinedStart::Cluster(const MatType& data,
                           const size_t clusters,
                           arma::Row<size_t>& assignments) const
{
  // Perform the Bradley-Fayyad refined start algorithm, and get initial
  // centroids back.
  arma::mat centroids;
  Cluster(data, clusters, centroids);

  // Turn the final centroids into assignments.
  assignments.set_size(data.n_cols);
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    // Find the closest centroid to this point.
    double minDistance = std::numeric_limits<double>::infinity();
    size_t closestCluster = clusters;

    for (size_t j = 0; j < clusters; ++j)
    {
      // This is restricted to the L2 distance, and unfortunately it would take
      // a lot of refactoring and redesign to make this more general... we would
      // probably need to have KMeans take a template template parameter for the
      // initial partition policy.  It's not clear how to best do this.
      const double distance = metric::EuclideanDistance::Evaluate(data.col(i),
          centroids.col(j));

      if (distance < minDistance)
      {
        minDistance = distance;
        closestCluster = j;
      }
    }

    // Assign the point to its closest cluster.
    assignments[i] = closestCluster;
  }
}
NaiveBayesClassifier<MatType>::NaiveBayesClassifier(
    const MatType& data,
    const arma::Col<size_t>& labels,
    const size_t classes,
    const bool incrementalVariance)
{
  const size_t dimensionality = data.n_rows;

  // Update the variables according to the number of features and classes
  // present in the data.
  probabilities.zeros(classes);
  means.zeros(dimensionality, classes);
  variances.zeros(dimensionality, classes);

  Log::Info << "Training Naive Bayes classifier on " << data.n_cols
      << " examples with " << dimensionality << " features each." << std::endl;

  // Calculate the class probabilities as well as the sample mean and variance
  // for each of the features with respect to each of the labels.
  if (incrementalVariance)
  {
    // Use incremental algorithm.
    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      ++probabilities[label];

      arma::vec delta = data.col(j) - means.col(label);
      means.col(label) += delta / probabilities[label];
      variances.col(label) += delta % (data.col(j) - means.col(label));
    }

    for (size_t i = 0; i < classes; ++i)
    {
      if (probabilities[i] > 2)
        variances.col(i) /= (probabilities[i] - 1);
    }
  }
  else
  {
    // Don't use incremental algorithm.  This is a two-pass algorithm.  It is
    // possible to calculate the means and variances using a faster one-pass
    // algorithm but there are some precision and stability issues.  If this is
    // too slow, it's an option to use the faster algorithm by default and then
    // have this (and the incremental algorithm) be other options.

    // Calculate the means.
    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      ++probabilities[label];
      means.col(label) += data.col(j);
    }

    // Normalize means.
    for (size_t i = 0; i < classes; ++i)
      if (probabilities[i] != 0.0)
        means.col(i) /= probabilities[i];

    // Calculate variances.
    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      variances.col(label) += square(data.col(j) - means.col(label));
    }

    // Normalize variances.
    for (size_t i = 0; i < classes; ++i)
      if (probabilities[i] > 1)
        variances.col(i) /= (probabilities[i] - 1);
  }

  // Ensure that the variances are invertible.
  for (size_t i = 0; i < variances.n_elem; ++i)
    if (variances[i] == 0.0)
      variances[i] = 1e-50;

  probabilities /= data.n_cols;
}
void DrusillaSelect<MatType>::Train(
    const MatType& referenceSet,
    const size_t lIn,
    const size_t mIn)
{
  // Did the user specify a new size?  If so, use it.
  if (lIn > 0)
    l = lIn;
  if (mIn > 0)
    m = mIn;

  if ((l * m) > referenceSet.n_cols)
    throw std::invalid_argument("DrusillaSelect::Train(): l and m are too "
        "large!  Choose smaller values.  l*m must be smaller than the number "
        "of points in the dataset.");

  candidateSet.set_size(referenceSet.n_rows, l * m);
  candidateIndices.set_size(l * m);

  arma::vec dataMean(arma::mean(referenceSet, 1));
  arma::vec norms(referenceSet.n_cols);

  MatType refCopy(referenceSet.n_rows, referenceSet.n_cols);
  for (size_t i = 0; i < refCopy.n_cols; ++i)
  {
    refCopy.col(i) = referenceSet.col(i) - dataMean;
    norms[i] = arma::norm(refCopy.col(i));
  }

  // Find the top m points for each of the l projections...
  for (size_t i = 0; i < l; ++i)
  {
    // Pick best index.
    arma::uword maxIndex;
    norms.max(maxIndex);

    arma::vec line(refCopy.col(maxIndex) / arma::norm(refCopy.col(maxIndex)));

    // Calculate distortion and offset and make scores.
    std::vector<bool> closeAngle(referenceSet.n_cols, false);
    arma::vec sums(referenceSet.n_cols);
    for (size_t j = 0; j < referenceSet.n_cols; ++j)
    {
      if (norms[j] > 0.0)
      {
        const double offset = arma::dot(refCopy.col(j), line);
        const double distortion = arma::norm(refCopy.col(j) - offset * line);
        sums[j] = std::abs(offset) - std::abs(distortion);
        closeAngle[j] =
            (std::atan(distortion / std::abs(offset)) < (M_PI / 8.0));
      }
      else
      {
        sums[j] = norms[j];
      }
    }

    // Find the top m elements using a priority queue.
    typedef std::pair<double, size_t> Candidate;
    struct CandidateCmp
    {
      bool operator()(const Candidate& c1, const Candidate& c2)
      {
        return c2.first < c1.first;
      }
    };

    std::vector<Candidate> clist(m, std::make_pair(double(-DBL_MAX), size_t(-1)));
    std::priority_queue<Candidate, std::vector<Candidate>, CandidateCmp>
        pq(CandidateCmp(), std::move(clist));

    for (size_t j = 0; j < sums.n_elem; ++j)
    {
      Candidate c = std::make_pair(sums[j], j);
      if (CandidateCmp()(c, pq.top()))
      {
        pq.pop();
        pq.push(c);
      }
    }

    // Take the top m elements for this table.
    for (size_t j = 0; j < m; ++j)
    {
      const size_t index = pq.top().second;
      pq.pop();
      candidateSet.col(i * m + j) = referenceSet.col(index);
      candidateIndices[i * m + j] = index;

      // Mark the norm as -1 so we don't see this point again.
      norms[index] = -1.0;
    }

    // Calculate angles from the current projection.  Anything close enough,
    // mark the norm as 0.
    for (size_t j = 0; j < norms.n_elem; ++j)
      if (norms[j] > 0.0 && closeAngle[j])
        norms[j] = 0.0;
  }
}
void BuildVanillaNetwork(MatType& trainData,
                         MatType& trainLabels,
                         MatType& testData,
                         MatType& testLabels,
                         const size_t hiddenLayerSize,
                         const size_t maxEpochs,
                         const double classificationErrorThreshold)
{
  /*
   * Construct a feed forward network with trainData.n_rows input nodes,
   * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
   * network structure looks like:
   *
   *  Input         Hidden        Output
   *  Layer         Layer         Layer
   * +-----+       +-----+       +-----+
   * |     |       |     |       |     |
   * |     +------>|     +------>|     |
   * |     |     +>|     |     +>|     |
   * +-----+     | +--+--+     | +-----+
   *             |             |
   *  Bias       |  Bias       |
   *  Layer      |  Layer      |
   * +-----+     | +-----+     |
   * |     |     | |     |     |
   * |     +-----+ |     +-----+
   * |     |       |     |
   * +-----+       +-----+
   */

  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
  BiasLayer<> inputBiasLayer(hiddenLayerSize);
  BaseLayer<PerformanceFunction> inputBaseLayer;

  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
  BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows);
  BaseLayer<PerformanceFunction> outputLayer;

  OutputLayerType classOutputLayer;

  auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer,
                          hiddenLayer1, hiddenBiasLayer1, outputLayer);

  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
      PerformanceFunctionType> net(modules, classOutputLayer);

  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
      maxEpochs * trainData.n_cols, 1e-18);

  net.Train(trainData, trainLabels, opt);

  MatType prediction;
  net.Predict(testData, prediction);

  size_t error = 0;
  for (size_t i = 0; i < testData.n_cols; i++)
  {
    if (arma::sum(arma::sum(
        arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
    {
      error++;
    }
  }

  double classificationError = 1 - double(error) / testData.n_cols;
  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
}
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data,
                                           const size_t emptyCluster,
                                           arma::mat& centroids,
                                           arma::Col<size_t>& clusterCounts,
                                           MetricType& metric)
{
  // First, we need to find the cluster with maximum variance (by which I mean
  // the sum of the covariance matrices).
  arma::vec variances;
  variances.zeros(clusterCounts.n_elem); // Start with 0.
  arma::Col<size_t> assignments(data.n_cols);

  // Add the variance of each point's distance away from the cluster.  I think
  // this is the sensible thing to do.
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    // Find the closest centroid to this point.
    double minDistance = std::numeric_limits<double>::infinity();
    size_t closestCluster = centroids.n_cols; // Invalid value.

    for (size_t j = 0; j < centroids.n_cols; j++)
    {
      const double distance = metric.Evaluate(data.col(i), centroids.col(j));

      if (distance < minDistance)
      {
        minDistance = distance;
        closestCluster = j;
      }
    }

    assignments[i] = closestCluster;
    variances[closestCluster] += metric.Evaluate(data.col(i),
        centroids.col(closestCluster));
  }

  // Divide by the number of points in the cluster to produce the variance.
  // Although a -nan will occur here for the empty cluster(s), this doesn't
  // matter because variances.max() won't pick it up.  If the number of points
  // in the cluster is 1, we ensure that cluster is not selected by forcing the
  // variance to 0.
  for (size_t i = 0; i < clusterCounts.n_elem; ++i)
    variances[i] /= (clusterCounts[i] == 1) ? DBL_MAX : clusterCounts[i];

  // Now find the cluster with maximum variance.
  arma::uword maxVarCluster;
  variances.max(maxVarCluster);

  // Now, inside this cluster, find the point which is furthest away.
  size_t furthestPoint = data.n_cols;
  double maxDistance = -DBL_MAX;
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    if (assignments[i] == maxVarCluster)
    {
      const double distance = metric.Evaluate(data.col(i),
          centroids.col(maxVarCluster));

      if (distance > maxDistance)
      {
        maxDistance = distance;
        furthestPoint = i;
      }
    }
  }

  // Take that point and add it to the empty cluster.
  centroids.col(maxVarCluster) *= (double(clusterCounts[maxVarCluster]) /
      double(clusterCounts[maxVarCluster] - 1));
  centroids.col(maxVarCluster) -= (1.0 / (clusterCounts[maxVarCluster] - 1.0)) *
      arma::vec(data.col(furthestPoint));
  clusterCounts[maxVarCluster]--;
  clusterCounts[emptyCluster]++;
  centroids.col(emptyCluster) = arma::vec(data.col(furthestPoint));
  assignments[furthestPoint] = emptyCluster;

  // Output some debugging information.
  Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " <<
      emptyCluster << ".\n";

  return 1; // We only changed one point.
}
void NaiveBayesClassifier<MatType>::Train(const MatType& data,
                                          const arma::Row<size_t>& labels,
                                          const bool incremental)
{
  // Calculate the class probabilities as well as the sample mean and variance
  // for each of the features with respect to each of the labels.
  if (incremental)
  {
    // Use incremental algorithm.
    // Fist, de-normalize probabilities.
    probabilities *= trainingPoints;

    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      ++probabilities[label];

      arma::vec delta = data.col(j) - means.col(label);
      means.col(label) += delta / probabilities[label];
      variances.col(label) += delta % (data.col(j) - means.col(label));
    }

    for (size_t i = 0; i < probabilities.n_elem; ++i)
    {
      if (probabilities[i] > 2)
        variances.col(i) /= (probabilities[i] - 1);
    }
  }
  else
  {
    // Set all parameters to zero
    probabilities.zeros();
    means.zeros();
    variances.zeros();

    // Don't use incremental algorithm.  This is a two-pass algorithm.  It is
    // possible to calculate the means and variances using a faster one-pass
    // algorithm but there are some precision and stability issues.  If this is
    // too slow, it's an option to use the faster algorithm by default and then
    // have this (and the incremental algorithm) be other options.

    // Calculate the means.
    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      ++probabilities[label];
      means.col(label) += data.col(j);
    }

    // Normalize means.
    for (size_t i = 0; i < probabilities.n_elem; ++i)
      if (probabilities[i] != 0.0)
        means.col(i) /= probabilities[i];

    // Calculate variances.
    for (size_t j = 0; j < data.n_cols; ++j)
    {
      const size_t label = labels[j];
      variances.col(label) += square(data.col(j) - means.col(label));
    }

    // Normalize variances.
    for (size_t i = 0; i < probabilities.n_elem; ++i)
      if (probabilities[i] > 1)
        variances.col(i) /= (probabilities[i] - 1);
  }

  // Ensure that the variances are invertible.
  for (size_t i = 0; i < variances.n_elem; ++i)
    if (variances[i] == 0.0)
      variances[i] = 1e-50;

  probabilities /= data.n_cols;
  trainingPoints += data.n_cols;
}
void RefinedStart::Cluster(const MatType& data,
                           const size_t clusters,
                           arma::Col<size_t>& assignments) const
{
  math::RandomSeed(std::time(NULL));

  // This will hold the sampled datasets.
  const size_t numPoints = size_t(percentage * data.n_cols);
  MatType sampledData(data.n_rows, numPoints);
  // vector<bool> is packed so each bool is 1 bit.
  std::vector<bool> pointsUsed(data.n_cols, false);
  arma::mat sampledCentroids(data.n_rows, samplings * clusters);

  // We will use these objects repeatedly for clustering.
  arma::Col<size_t> sampledAssignments;
  arma::mat centroids;
  KMeans<> kmeans;

  for (size_t i = 0; i < samplings; ++i)
  {
    // First, assemble the sampled dataset.
    size_t curSample = 0;
    while (curSample < numPoints)
    {
      // Pick a random point in [0, numPoints).
      size_t sample = (size_t) math::RandInt(data.n_cols);

      if (!pointsUsed[sample])
      {
        // This point isn't used yet.  So we'll put it in our sample.
        pointsUsed[sample] = true;
        sampledData.col(curSample) = data.col(sample);
        ++curSample;
      }
    }

    // Now, using the sampled dataset, run k-means.  In the case of an empty
    // cluster, we re-initialize that cluster as the point furthest away from
    // the cluster with maximum variance.  This is not *exactly* what the paper
    // implements, but it is quite similar, and we'll call it "good enough".
    kmeans.Cluster(sampledData, clusters, sampledAssignments, centroids);

    // Store the sampled centroids.
    sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids;

    pointsUsed.assign(data.n_cols, false);
  }

  // Now, we run k-means on the sampled centroids to get our final clusters.
  kmeans.Cluster(sampledCentroids, clusters, sampledAssignments, centroids);

  // Turn the final centroids into assignments.
  assignments.set_size(data.n_cols);
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    // Find the closest centroid to this point.
    double minDistance = std::numeric_limits<double>::infinity();
    size_t closestCluster = clusters;

    for (size_t j = 0; j < clusters; ++j)
    {
      const double distance = kmeans.Metric().Evaluate(data.col(i),
          centroids.col(j));

      if (distance < minDistance)
      {
        minDistance = distance;
        closestCluster = j;
      }
    }

    // Assign the point to its closest cluster.
    assignments[i] = closestCluster;
  }
}