BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    std::vector<size_t>& oldFromNew,
    std::vector<size_t>& newFromOld,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    maxLeafSize(maxLeafSize),
    bound(data.n_rows),
    dataset(data)
{
  // Hopefully the vector is initialized correctly!  We can't check that
  // entirely but we can do a minor sanity check.


  // Perform the actual splitting.
  SplitNode(data, oldFromNew);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);

  // Map the newFromOld indices correctly.
  newFromOld.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    newFromOld[oldFromNew[i]] = i;
}
Ejemplo n.º 2
0
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>::
SpillTree(
    MatType&& data,
    const double tau,
    const size_t maxLeafSize,
    const double rho) :
    left(NULL),
    right(NULL),
    parent(NULL),
    count(0),
    pointsIndex(NULL),
    overlappingNode(false),
    hyperplane(),
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(new MatType(std::move(data))),
    localDataset(true)
{
  arma::Col<size_t> points;
  if (dataset->n_cols > 0)
    // Fill points with all possible indexes: 0 .. (dataset->n_cols - 1).
    points = arma::linspace<arma::Col<size_t>>(0, dataset->n_cols - 1,
        dataset->n_cols);

  // Do the actual splitting of this node.
  SplitNode(points, maxLeafSize, tau, rho);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    std::vector<size_t>& oldFromNew,
    std::vector<size_t>& newFromOld,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(NULL),
    begin(0),
    count(data.n_cols),
    maxLeafSize(maxLeafSize),
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(data)
{
  // Initialize the oldFromNew vector correctly.
  oldFromNew.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    oldFromNew[i] = i; // Fill with unharmed indices.

  // Now do the actual splitting.
  SplitNode(data, oldFromNew);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);

  // Map the newFromOld indices correctly.
  newFromOld.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    newFromOld[oldFromNew[i]] = i;
}
Ejemplo n.º 4
0
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree(
    const MatType& dataset,
    const ElemType base,
    const size_t pointIndex,
    const int scale,
    CoverTree* parent,
    const ElemType parentDistance,
    const ElemType furthestDescendantDistance,
    MetricType* metric) :
    dataset(&dataset),
    point(pointIndex),
    scale(scale),
    base(base),
    numDescendants(0),
    parent(parent),
    parentDistance(parentDistance),
    furthestDescendantDistance(furthestDescendantDistance),
    localMetric(metric == NULL),
    localDataset(false),
    metric(metric),
    distanceComps(0)
{
  // If necessary, create a local metric.
  if (localMetric)
    this->metric = new MetricType();

  // Initialize the statistic.
  stat = StatisticType(*this);
}
Ejemplo n.º 5
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    std::vector<size_t>& oldFromNew,
    SplitType& splitter,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    bound(data.n_rows),
    dataset(data)
{
  // Hopefully the vector is initialized correctly!  We can't check that
  // entirely but we can do a minor sanity check.
  assert(oldFromNew.size() == data.n_cols);

  // Perform the actual splitting.
  SplitNode(data, oldFromNew, maxLeafSize, splitter);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 6
0
inline void CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::
    RemoveNewImplicitNodes()
{
  // If we created an implicit node, take its self-child instead (this could
  // happen multiple times).
  while (children[children.size() - 1]->NumChildren() == 1)
  {
    CoverTree* old = children[children.size() - 1];
    children.erase(children.begin() + children.size() - 1);

    // Now take its child.
    children.push_back(&(old->Child(0)));

    // Set its parent and parameters correctly, and rebuild the statistic.
    old->Child(0).Parent() = this;
    old->Child(0).ParentDistance() = old->ParentDistance();
    old->Child(0).DistanceComps() = old->DistanceComps();
    old->Child(0).Stat() = StatisticType(old->Child(0));

    // Remove its child (so it doesn't delete it).
    old->Children().erase(old->Children().begin() + old->Children().size() - 1);

    // Now delete it.
    delete old;
  }
}
Ejemplo n.º 7
0
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree(
    const MatType& dataset,
    const ElemType base,
    const size_t pointIndex,
    const int scale,
    CoverTree* parent,
    const ElemType parentDistance,
    arma::Col<size_t>& indices,
    arma::vec& distances,
    size_t nearSetSize,
    size_t& farSetSize,
    size_t& usedSetSize,
    MetricType& metric) :
    dataset(&dataset),
    point(pointIndex),
    scale(scale),
    base(base),
    numDescendants(0),
    parent(parent),
    parentDistance(parentDistance),
    furthestDescendantDistance(0),
    localMetric(false),
    localDataset(false),
    metric(&metric),
    distanceComps(0)
{
  // If the size of the near set is 0, this is a leaf.
  if (nearSetSize == 0)
  {
    this->scale = INT_MIN;
    numDescendants = 1;
    stat = StatisticType(*this);
    return;
  }

  // Otherwise, create the children.
  CreateChildren(indices, distances, nearSetSize, farSetSize, usedSetSize);

  // Initialize statistic.
  stat = StatisticType(*this);
}
Ejemplo n.º 8
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(NULL),
    begin(0), /* This root node starts at index 0, */
    count(data.n_cols), /* and spans all of the dataset. */
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(data)
{
  // Do the actual splitting of this node.
  SplitNode(data, maxLeafSize);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 9
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    bound(data.n_rows),
    dataset(data)
{
  // Perform the actual splitting.
  SplitNode(data, maxLeafSize);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 10
0
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>::
SpillTree(
    SpillTree* parent,
    arma::Col<size_t>& points,
    const double tau,
    const size_t maxLeafSize,
    const double rho) :
    left(NULL),
    right(NULL),
    parent(parent),
    count(0),
    pointsIndex(NULL),
    overlappingNode(false),
    hyperplane(),
    bound(parent->Dataset().n_rows),
    dataset(&parent->Dataset()), // Point to the parent's dataset.
    localDataset(false)
{
  // Perform the actual splitting.
  SplitNode(points, maxLeafSize, tau, rho);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 11
0
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree(
    MatType&& data,
    MetricType& metric,
    const ElemType base) :
    dataset(new MatType(std::move(data))),
    point(RootPointPolicy::ChooseRoot(dataset)),
    scale(INT_MAX),
    base(base),
    numDescendants(0),
    parent(NULL),
    parentDistance(0),
    furthestDescendantDistance(0),
    localMetric(false),
    localDataset(true),
    metric(&metric),
    distanceComps(0)
{
  // If there is only one point or zero points in the dataset... uh, we're done.
  // Technically, if the dataset has zero points, our node is not correct...
  if (dataset->n_cols <= 1)
  {
    scale = INT_MIN;
    return;
  }

  // Kick off the building.  Create the indices array and the distances array.
  arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1,
      dataset->n_cols - 1, dataset->n_cols - 1);
  // This is now [1 2 3 4 ... n].  We must be sure that our point does not
  // occur.
  if (point != 0)
    indices[point - 1] = 0; // Put 0 back into the set; remove what was there.

  arma::vec distances(dataset->n_cols - 1);

  // Build the initial distances.
  ComputeDistances(point, indices, distances, dataset->n_cols - 1);

  // Create the children.
  size_t farSetSize = 0;
  size_t usedSetSize = 0;
  CreateChildren(indices, distances, dataset->n_cols - 1, farSetSize,
      usedSetSize);

  // If we ended up creating only one child, remove the implicit node.
  while (children.size() == 1)
  {
    // Prepare to delete the implicit child node.
    CoverTree* old = children[0];

    // Now take its children and set their parent correctly.
    children.erase(children.begin());
    for (size_t i = 0; i < old->NumChildren(); ++i)
    {
      children.push_back(&(old->Child(i)));

      // Set its parent correctly, and rebuild the statistic.
      old->Child(i).Parent() = this;
      old->Child(i).Stat() = StatisticType(old->Child(i));
    }

    // Remove all the children so they don't get erased.
    old->Children().clear();

    // Reduce our own scale.
    scale = old->Scale();

    // Now delete it.
    delete old;
  }

  // Use the furthest descendant distance to determine the scale of the root
  // node.
  if (furthestDescendantDistance == 0.0)
    scale = INT_MIN;
  else
    scale = (int) ceil(log(furthestDescendantDistance) / log(base));

  // Initialize statistic.
  stat = StatisticType(*this);

  Log::Info << distanceComps << " distance computations during tree "
      << "construction." << std::endl;
}