Exemple #1
0
typename CoverTree<MetricType, StatisticType, MatType,
    RootPointPolicy>::ElemType
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::
    MaxDistance(const CoverTree& other) const
{
  return metric->Evaluate(dataset->col(point),
      other.Dataset().col(other.Point())) +
      furthestDescendantDistance + other.FurthestDescendantDistance();
}
Exemple #2
0
typename CoverTree<MetricType, StatisticType, MatType,
    RootPointPolicy>::ElemType
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::
    MinDistance(const CoverTree& other) const
{
  // Every cover tree node will contain points up to base^(scale + 1) away.
  return std::max(metric->Evaluate(dataset->col(point),
      other.Dataset().col(other.Point())) -
      furthestDescendantDistance - other.FurthestDescendantDistance(), 0.0);
}
void CoverTree<MetricType, RootPointPolicy, StatisticType>::
DualTreeTraverser<RuleType>::Traverse(
    CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode,
    CoverTree<MetricType, RootPointPolicy, StatisticType>& referenceNode)
{
  // Start by creating a map and adding the reference root node to it.
  std::map<int, std::vector<DualCoverTreeMapEntry> > refMap;

  DualCoverTreeMapEntry rootRefEntry;

  rootRefEntry.referenceNode = &referenceNode;

  // Perform the evaluation between the roots of either tree.
  rootRefEntry.score = rule.Score(queryNode, referenceNode);
  rootRefEntry.baseCase = rule.BaseCase(queryNode.Point(),
      referenceNode.Point());
  rootRefEntry.traversalInfo = rule.TraversalInfo();

  refMap[referenceNode.Scale()].push_back(rootRefEntry);

  Traverse(queryNode, refMap);
}
Exemple #4
0
math::RangeType<typename
    CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::ElemType>
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::
    RangeDistance(const CoverTree& other) const
{
  const ElemType distance = metric->Evaluate(dataset->col(point),
      other.Dataset().col(other.Point()));

  math::RangeType<ElemType> result;
  result.Lo() = distance - furthestDescendantDistance -
      other.FurthestDescendantDistance();
  result.Hi() = distance + furthestDescendantDistance +
      other.FurthestDescendantDistance();

  return result;
}
void CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::
SingleTreeTraverser<RuleType>::Traverse(
    const size_t queryIndex,
    CoverTree& referenceNode)
{
  // This is a non-recursive implementation (which should be faster than a
  // recursive implementation).
  typedef CoverTreeMapEntry<MetricType, StatisticType, MatType, RootPointPolicy>
      MapEntryType;

  // We will use this map as a priority queue.  Each key represents the scale,
  // and then the vector is all the nodes in that scale which need to be
  // investigated.  Because no point in a scale can add a point in its own
  // scale, we know that the vector for each scale is final when we get to it.
  // In addition, map is organized in such a way that rbegin() will return the
  // largest scale.
  std::map<int, std::vector<MapEntryType> > mapQueue;

  // Create the score for the children.
  double rootChildScore = rule.Score(queryIndex, referenceNode);

  if (rootChildScore == DBL_MAX)
  {
    numPrunes += referenceNode.NumChildren();
  }
  else
  {
    // Manually add the children of the first node.
    // Often, a ruleset will return without doing any computation on cover trees
    // using TreeTraits::FirstPointIsCentroid; this is an optimization that
    // (theoretically) the compiler should get right.
    double rootBaseCase = rule.BaseCase(queryIndex, referenceNode.Point());

    // Don't add the self-leaf.
    size_t i = 0;
    if (referenceNode.Child(0).NumChildren() == 0)
    {
      ++numPrunes;
      i = 1;
    }

    for (/* i was set above. */; i < referenceNode.NumChildren(); ++i)
    {
      MapEntryType newFrame;
      newFrame.node = &referenceNode.Child(i);
      newFrame.score = rootChildScore;
      newFrame.baseCase = rootBaseCase;
      newFrame.parent = referenceNode.Point();

      // Put it into the map.
      mapQueue[newFrame.node->Scale()].push_back(newFrame);
    }
  }

  // Now begin the iteration through the map, but only if it has anything in it.
  if (mapQueue.empty())
    return;
  typename std::map<int, std::vector<MapEntryType> >::reverse_iterator rit =
      mapQueue.rbegin();

  // We will treat the leaves differently (below).
  while ((*rit).first != INT_MIN)
  {
    // Get a reference to the current scale.
    std::vector<MapEntryType>& scaleVector = (*rit).second;

    // Before traversing all the points in this scale, sort by score.
    std::sort(scaleVector.begin(), scaleVector.end());

    // Now loop over each element.
    for (size_t i = 0; i < scaleVector.size(); ++i)
    {
      // Get a reference to the current element.
      const MapEntryType& frame = scaleVector.at(i);

      CoverTree* node = frame.node;
      const double score = frame.score;
      const size_t parent = frame.parent;
      const size_t point = node->Point();
      double baseCase = frame.baseCase;

      // First we recalculate the score of this node to find if we can prune it.
      if (rule.Rescore(queryIndex, *node, score) == DBL_MAX)
      {
        ++numPrunes;
        continue;
      }

      // Create the score for the children.
      const double childScore = rule.Score(queryIndex, *node);

      // Now if this childScore is DBL_MAX we can prune all children.  In this
      // recursion setup pruning is all or nothing for children.
      if (childScore == DBL_MAX)
      {
        numPrunes += node->NumChildren();
        continue;
      }

      // If we are a self-child, the base case has already been evaluated.
      // Often, a ruleset will return without doing any computation on cover
      // trees using TreeTraits::FirstPointIsCentroid; this is an optimization
      // that (theoretically) the compiler should get right.
      if (point != parent)
        baseCase = rule.BaseCase(queryIndex, point);

      // Don't add the self-leaf.
      size_t j = 0;
      if (node->Child(0).NumChildren() == 0)
      {
        ++numPrunes;
        j = 1;
      }

      for (/* j is already set. */; j < node->NumChildren(); ++j)
      {
        MapEntryType newFrame;
        newFrame.node = &node->Child(j);
        newFrame.score = childScore;
        newFrame.baseCase = baseCase;
        newFrame.parent = point;

        mapQueue[newFrame.node->Scale()].push_back(newFrame);
      }
    }

    // Now clear the memory for this scale; it isn't needed anymore.
    mapQueue.erase((*rit).first);
  }

  // Now deal with the leaves.
  for (size_t i = 0; i < mapQueue[INT_MIN].size(); ++i)
  {
    const MapEntryType& frame = mapQueue[INT_MIN].at(i);

    CoverTree* node = frame.node;
    const double score = frame.score;
    const size_t point = node->Point();

    // First, recalculate the score of this node to find if we can prune it.
    double rescore = rule.Rescore(queryIndex, *node, score);

    if (rescore == DBL_MAX)
    {
      ++numPrunes;
      continue;
    }

    // For this to be a valid dual-tree algorithm, we *must* evaluate the
    // combination, even if pruning it will make no difference.  It's the
    // definition.
    const double actualScore = rule.Score(queryIndex, *node);

    if (actualScore == DBL_MAX)
    {
      ++numPrunes;
      continue;
    }
    else
    {
      // Evaluate the base case, since the combination was not pruned.
      // Often, a ruleset will return without doing any computation on cover
      // trees using TreeTraits::FirstPointIsCentroid; this is an optimization
      // that (theoretically) the compiler should get right.
      rule.BaseCase(queryIndex, point);
    }
  }
}
void CoverTree<MetricType, RootPointPolicy, StatisticType>::
DualTreeTraverser<RuleType>::Traverse(
    CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode,
    std::map<int, std::vector<DualCoverTreeMapEntry> >& referenceMap)
{
  if (referenceMap.size() == 0)
    return; // Nothing to do!

  // First recurse down the reference nodes as necessary.
  ReferenceRecursion(queryNode, referenceMap);

  // Did the map get emptied?
  if (referenceMap.size() == 0)
    return; // Nothing to do!

  // Now, reduce the scale of the query node by recursing.  But we can't recurse
  // if the query node is a leaf node.
  if ((queryNode.Scale() != INT_MIN) &&
      (queryNode.Scale() >= (*referenceMap.rbegin()).first))
  {
    // Recurse into the non-self-children first.  The recursion order cannot
    // affect the runtime of the algorithm, because each query child recursion's
    // results are separate and independent.  I don't think this is true in
    // every case, and we may have to modify this section to consider scores in
    // the future.
    for (size_t i = 1; i < queryNode.NumChildren(); ++i)
    {
      // We need a copy of the map for this child.
      std::map<int, std::vector<DualCoverTreeMapEntry> > childMap;
      PruneMap(queryNode.Child(i), referenceMap, childMap);
      Traverse(queryNode.Child(i), childMap);
    }
    std::map<int, std::vector<DualCoverTreeMapEntry> > selfChildMap;
    PruneMap(queryNode.Child(0), referenceMap, selfChildMap);
    Traverse(queryNode.Child(0), selfChildMap);
  }

  if (queryNode.Scale() != INT_MIN)
    return; // No need to evaluate base cases at this level.  It's all done.

  // If we have made it this far, all we have is a bunch of base case
  // evaluations to do.
  Log::Assert((*referenceMap.begin()).first == INT_MIN);
  Log::Assert(queryNode.Scale() == INT_MIN);
  std::vector<DualCoverTreeMapEntry>& pointVector =
      (*referenceMap.begin()).second;

  for (size_t i = 0; i < pointVector.size(); ++i)
  {
    // Get a reference to the frame.
    const DualCoverTreeMapEntry& frame = pointVector[i];

    CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode =
        frame.referenceNode;

    // If the point is the same as both parents, then we have already done this
    // base case.
    if ((refNode->Point() == refNode->Parent()->Point()) &&
        (queryNode.Point() == queryNode.Parent()->Point()))
    {
      ++numPrunes;
      continue;
    }

    // Score the node, to see if we can prune it, after restoring the traversal
    // info.
    rule.TraversalInfo() = frame.traversalInfo;
    double score = rule.Score(queryNode, *refNode);

    if (score == DBL_MAX)
    {
      ++numPrunes;
      continue;
    }

    // If not, compute the base case.
    rule.BaseCase(queryNode.Point(), pointVector[i].referenceNode->Point());
  }
}
void CoverTree<MetricType, RootPointPolicy, StatisticType>::
DualTreeTraverser<RuleType>::ReferenceRecursion(
    CoverTree& queryNode,
    std::map<int, std::vector<DualCoverTreeMapEntry> >& referenceMap)
{
  // First, reduce the maximum scale in the reference map down to the scale of
  // the query node.
  while (!referenceMap.empty())
  {
    // Hacky bullshit to imitate jl cover tree.
    if (queryNode.Parent() == NULL && (*referenceMap.rbegin()).first <
        queryNode.Scale())
      break;
    if (queryNode.Parent() != NULL && (*referenceMap.rbegin()).first <=
        queryNode.Scale())
      break;
    // If the query node's scale is INT_MIN and the reference map's maximum
    // scale is INT_MIN, don't try to recurse...
    if ((queryNode.Scale() == INT_MIN) &&
       ((*referenceMap.rbegin()).first == INT_MIN))
      break;

    // Get a reference to the current largest scale.
    std::vector<DualCoverTreeMapEntry>& scaleVector = (*referenceMap.rbegin()).second;

    // Before traversing all the points in this scale, sort by score.
    std::sort(scaleVector.begin(), scaleVector.end());

    // Now loop over each element.
    for (size_t i = 0; i < scaleVector.size(); ++i)
    {
      // Get a reference to the current element.
      const DualCoverTreeMapEntry& frame = scaleVector.at(i);

      CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode =
          frame.referenceNode;

      // Create the score for the children.
      double score = rule.Rescore(queryNode, *refNode, frame.score);

      // Now if this childScore is DBL_MAX we can prune all children.  In this
      // recursion setup pruning is all or nothing for children.
      if (score == DBL_MAX)
      {
        ++numPrunes;
        continue;
      }

      // If it is not pruned, we must evaluate the base case.

      // Add the children.
      for (size_t j = 0; j < refNode->NumChildren(); ++j)
      {
        rule.TraversalInfo() = frame.traversalInfo;
        double childScore = rule.Score(queryNode, refNode->Child(j));
        if (childScore == DBL_MAX)
        {
          ++numPrunes;
          continue;
        }

        // It wasn't pruned; evaluate the base case.
        const double baseCase = rule.BaseCase(queryNode.Point(),
            refNode->Child(j).Point());

        DualCoverTreeMapEntry newFrame;
        newFrame.referenceNode = &refNode->Child(j);
        newFrame.score = childScore; // Use the score of the parent.
        newFrame.baseCase = baseCase;
        newFrame.traversalInfo = rule.TraversalInfo();

        referenceMap[newFrame.referenceNode->Scale()].push_back(newFrame);
      }
    }

    // Now clear the memory for this scale; it isn't needed anymore.
    referenceMap.erase((*referenceMap.rbegin()).first);
  }
}
void CoverTree<MetricType, RootPointPolicy, StatisticType>::
DualTreeTraverser<RuleType>::PruneMap(
    CoverTree& queryNode,
    std::map<int, std::vector<DualCoverTreeMapEntry> >& referenceMap,
    std::map<int, std::vector<DualCoverTreeMapEntry> >& childMap)
{
  if (referenceMap.empty())
    return; // Nothing to do.

  // Copy the zero set first.
  if ((*referenceMap.begin()).first == INT_MIN)
  {
    // Get a reference to the vector representing the entries at this scale.
    std::vector<DualCoverTreeMapEntry>& scaleVector =
        (*referenceMap.begin()).second;

    // Before traversing all the points in this scale, sort by score.
    std::sort(scaleVector.begin(), scaleVector.end());

    const int thisScale = (*referenceMap.begin()).first;
    childMap[thisScale].reserve(scaleVector.size());
    std::vector<DualCoverTreeMapEntry>& newScaleVector = childMap[thisScale];

    // Loop over each entry in the vector.
    for (size_t j = 0; j < scaleVector.size(); ++j)
    {
      const DualCoverTreeMapEntry& frame = scaleVector[j];

      // First evaluate if we can prune without performing the base case.
      CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode =
          frame.referenceNode;

      // Perform the actual scoring, after restoring the traversal info.
      rule.TraversalInfo() = frame.traversalInfo;
      double score = rule.Score(queryNode, *refNode);

      if (score == DBL_MAX)
      {
        // Pruned.  Move on.
        ++numPrunes;
        continue;
      }

      // If it isn't pruned, we must evaluate the base case.
      const double baseCase = rule.BaseCase(queryNode.Point(),
          refNode->Point());

      // Add to child map.
      newScaleVector.push_back(frame);
      newScaleVector.back().score = score;
      newScaleVector.back().baseCase = baseCase;
      newScaleVector.back().traversalInfo = rule.TraversalInfo();
    }

    // If we didn't add anything, then strike this vector from the map.
    if (newScaleVector.size() == 0)
      childMap.erase((*referenceMap.begin()).first);
  }

  typename std::map<int, std::vector<DualCoverTreeMapEntry> >::reverse_iterator
      it = referenceMap.rbegin();

  while ((it != referenceMap.rend()))
  {
    const int thisScale = (*it).first;
    if (thisScale == INT_MIN) // We already did it.
      break;

    // Get a reference to the vector representing the entries at this scale.
    std::vector<DualCoverTreeMapEntry>& scaleVector = (*it).second;

    // Before traversing all the points in this scale, sort by score.
    std::sort(scaleVector.begin(), scaleVector.end());

    childMap[thisScale].reserve(scaleVector.size());
    std::vector<DualCoverTreeMapEntry>& newScaleVector = childMap[thisScale];

    // Loop over each entry in the vector.
    for (size_t j = 0; j < scaleVector.size(); ++j)
    {
      const DualCoverTreeMapEntry& frame = scaleVector[j];

      // First evaluate if we can prune without performing the base case.
      CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode =
          frame.referenceNode;

      // Perform the actual scoring, after restoring the traversal info.
      rule.TraversalInfo() = frame.traversalInfo;
      double score = rule.Score(queryNode, *refNode);

      if (score == DBL_MAX)
      {
        // Pruned.  Move on.
        ++numPrunes;
        continue;
      }

      // If it isn't pruned, we must evaluate the base case.
      const double baseCase = rule.BaseCase(queryNode.Point(),
          refNode->Point());

      // Add to child map.
      newScaleVector.push_back(frame);
      newScaleVector.back().score = score;
      newScaleVector.back().baseCase = baseCase;
      newScaleVector.back().traversalInfo = rule.TraversalInfo();
    }

    // If we didn't add anything, then strike this vector from the map.
    if (newScaleVector.size() == 0)
      childMap.erase((*it).first);

    ++it; // Advance to next scale.
  }
}