void RangeSearchRules<MetricType, TreeType>::AddResult(const size_t queryIndex,
                                                       TreeType& referenceNode)
{
  // Some types of trees calculate the base case evaluation before Score() is
  // called, so if the base case has already been calculated, then we must avoid
  // adding that point to the results again.
  size_t baseCaseMod = 0;
  if (tree::TreeTraits<TreeType>::FirstPointIsCentroid &&
      (queryIndex == lastQueryIndex) &&
      (referenceNode.Point(0) == lastReferenceIndex))
  {
    baseCaseMod = 1;
  }

  // Resize distances and neighbors vectors appropriately.  We have to use
  // reserve() and not resize(), because we don't know if we will encounter the
  // case where the datasets and points are the same (and we skip in that case).
  const size_t oldSize = neighbors[queryIndex].size();
  neighbors[queryIndex].reserve(oldSize + referenceNode.NumDescendants() -
      baseCaseMod);
  distances[queryIndex].reserve(oldSize + referenceNode.NumDescendants() -
      baseCaseMod);

  for (size_t i = baseCaseMod; i < referenceNode.NumDescendants(); ++i)
  {
    if ((&referenceSet == &querySet) &&
        (queryIndex == referenceNode.Descendant(i)))
      continue;

    const double distance = metric.Evaluate(querySet.unsafe_col(queryIndex),
        referenceNode.Dataset().unsafe_col(referenceNode.Descendant(i)));

    neighbors[queryIndex].push_back(referenceNode.Descendant(i));
    distances[queryIndex].push_back(distance);
  }
}
Esempio n. 2
0
void CheckBound(const TreeType& tree)
{
  typedef typename TreeType::ElemType ElemType;
  for (size_t i = 0; i < tree.NumDescendants(); i++)
  {
    arma::Col<ElemType> point = tree.Dataset().col(tree.Descendant(i));

    // Check that the point is contained in the bound.
    BOOST_REQUIRE_EQUAL(true, tree.Bound().Contains(point));

    const arma::Mat<ElemType>& loBound = tree.Bound().LoBound();
    const arma::Mat<ElemType>& hiBound = tree.Bound().HiBound();

    // Ensure that there is a hyperrectangle that contains the point.
    bool success = false;
    for (size_t j = 0; j < tree.Bound().NumBounds(); j++)
    {
      success = true;
      for (size_t k = 0; k < loBound.n_rows; k++)
      {
        if (point[k] < loBound(k, j) - 1e-14 * std::fabs(loBound(k, j)) ||
            point[k] > hiBound(k, j) + 1e-14 * std::fabs(hiBound(k, j)))
        {
          success = false;
          break;
        }
      }
      if (success)
        break;
    }

    BOOST_REQUIRE_EQUAL(success, true);
  }

  if (!tree.IsLeaf())
  {
    CheckBound(*tree.Left());
    CheckBound(*tree.Right());
  }
}
void GreedySingleTreeTraverser<TreeType, RuleType>::Traverse(
    const size_t queryIndex,
    TreeType& referenceNode)
{
  // Run the base case as necessary for all the points in the reference node.
  for (size_t i = 0; i < referenceNode.NumPoints(); ++i)
    rule.BaseCase(queryIndex, referenceNode.Point(i));

  size_t bestChild = rule.GetBestChild(queryIndex, referenceNode);
  size_t numDescendants;

  // Check that referencenode is not a leaf node while calculating number of
  // descendants of it's best child.
  if (!referenceNode.IsLeaf())
    numDescendants = referenceNode.Child(bestChild).NumDescendants();
  else
    numDescendants = referenceNode.NumPoints();

  // If number of descendants are more than minBaseCases than we can go along
  // with best child otherwise we need to traverse for each descendant to
  // ensure that we calculate at least minBaseCases number of base cases.
  if (!referenceNode.IsLeaf())
  {
    if (numDescendants > minBaseCases)
    {
      // We are prunning all but one child.
      numPrunes += referenceNode.NumChildren() - 1;
      // Recurse the best child.
      Traverse(queryIndex, referenceNode.Child(bestChild));
    }
    else
    {
      // Run the base case over first minBaseCases number of descendants.
      for (size_t i = 0; i <= minBaseCases; ++i)
        rule.BaseCase(queryIndex, referenceNode.Descendant(i));
    }
  }
}
Esempio n. 4
0
inline double KDERules<MetricType, KernelType, TreeType>::
Score(TreeType& queryNode, TreeType& referenceNode)
{
  double score, maxKernel, minKernel, bound;
  const double minDistance = queryNode.MinDistance(referenceNode);
  // Calculations are not duplicated.
  bool newCalculations = true;

  if (tree::TreeTraits<TreeType>::FirstPointIsCentroid &&
      (traversalInfo.LastQueryNode() != NULL) &&
      (traversalInfo.LastReferenceNode() != NULL) &&
      (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) &&
      (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0)))
  {
    // Don't duplicate calculations.
    newCalculations = false;
    lastQueryIndex = queryNode.Point(0);
    lastReferenceIndex = referenceNode.Point(0);
  }
  else
  {
    // Calculations are new.
    maxKernel = kernel.Evaluate(minDistance);
    minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode));
    bound = maxKernel - minKernel;
  }

  // If possible, avoid some calculations because of the error tolerance.
  if (newCalculations &&
      bound <= (absError + relError * minKernel) / referenceSet.n_cols)
  {
    // Auxiliary variables.
    double kernelValue;
    kde::KDEStat& referenceStat = referenceNode.Stat();
    kde::KDEStat& queryStat = queryNode.Stat();

    // If calculating a center is not required.
    if (tree::TreeTraits<TreeType>::FirstPointIsCentroid)
    {
      kernelValue = EvaluateKernel(queryNode.Point(0), referenceNode.Point(0));
    }
    // Sadly, we have no choice but to calculate the center.
    else
    {
      kernelValue = EvaluateKernel(queryStat.Centroid(),
                                   referenceStat.Centroid());
    }

    // Sum up estimations.
    for (size_t i = 0; i < queryNode.NumDescendants(); ++i)
    {
      densities(queryNode.Descendant(i)) +=
          referenceNode.NumDescendants() * kernelValue;
    }
    score = DBL_MAX;
  }
  else
  {
    score = minDistance;
  }

  ++scores;
  traversalInfo.LastQueryNode() = &queryNode;
  traversalInfo.LastReferenceNode() = &referenceNode;
  traversalInfo.LastScore() = score;
  return score;
}
Esempio n. 5
0
void CheckDistance(TreeType& tree, TreeType* node = NULL)
{
  typedef typename TreeType::ElemType ElemType;
  if (node == NULL)
  {
    node = &tree;

    while (node->Parent() != NULL)
      node = node->Parent();

    CheckDistance<TreeType, MetricType>(tree, node);

    for (size_t j = 0; j < tree.Dataset().n_cols; j++)
    {
      const arma::Col<ElemType>& point = tree.  Dataset().col(j);
      ElemType maxDist = 0;
      ElemType minDist = std::numeric_limits<ElemType>::max();
      for (size_t i = 0; i < tree.NumDescendants(); i++)
      {
        ElemType dist = MetricType::Evaluate(
            tree.Dataset().col(tree.Descendant(i)),
            tree.Dataset().col(j));

        if (dist > maxDist)
          maxDist = dist;
        if (dist < minDist)
          minDist = dist;
      }

      BOOST_REQUIRE_LE(tree.Bound().MinDistance(point), minDist *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
      BOOST_REQUIRE_LE(maxDist, tree.Bound().MaxDistance(point) *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));

      math::RangeType<ElemType> r = tree.Bound().RangeDistance(point);

      BOOST_REQUIRE_LE(r.Lo(), minDist *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
      BOOST_REQUIRE_LE(maxDist, r.Hi() *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
    }
      
    if (!tree.IsLeaf())
    {
      CheckDistance<TreeType, MetricType>(*tree.Left());
      CheckDistance<TreeType, MetricType>(*tree.Right());
    }
  }
  else
  {
    if (&tree != node)
    {
      ElemType maxDist = 0;
      ElemType minDist = std::numeric_limits<ElemType>::max();
      for (size_t i = 0; i < tree.NumDescendants(); i++)
        for (size_t j = 0; j < node->NumDescendants(); j++)
        {
          ElemType dist = MetricType::Evaluate(
              tree.Dataset().col(tree.Descendant(i)),
              node->Dataset().col(node->Descendant(j)));

          if (dist > maxDist)
            maxDist = dist;
          if (dist < minDist)
            minDist = dist;
        }

      BOOST_REQUIRE_LE(tree.Bound().MinDistance(node->Bound()), minDist *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
      BOOST_REQUIRE_LE(maxDist, tree.Bound().MaxDistance(node->Bound()) *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));

      math::RangeType<ElemType> r = tree.Bound().RangeDistance(node->Bound());

      BOOST_REQUIRE_LE(r.Lo(), minDist *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
      BOOST_REQUIRE_LE(maxDist, r.Hi() *
          (1.0 + 10 * std::numeric_limits<ElemType>::epsilon()));
    }
    if (!node->IsLeaf())
    {
      CheckDistance<TreeType, MetricType>(tree, node->Left());
      CheckDistance<TreeType, MetricType>(tree, node->Right());
    }
  }
}
double RangeSearchRules<MetricType, TreeType>::Score(TreeType& queryNode,
                                                     TreeType& referenceNode)
{
  math::Range distances;
  if (tree::TreeTraits<TreeType>::FirstPointIsCentroid)
  {
    // It is possible that the base case has already been calculated.
    double baseCase = 0.0;
    bool alreadyDone = false;
    if (tree::TreeTraits<TreeType>::HasSelfChildren)
    {
      TreeType* lastQuery = (TreeType*) referenceNode.Stat().LastDistanceNode();
      TreeType* lastRef = (TreeType*) queryNode.Stat().LastDistanceNode();

      // Did the query node's last combination do the base case?
      if ((lastRef != NULL) && (referenceNode.Point(0) == lastRef->Point(0)))
      {
        baseCase = queryNode.Stat().LastDistance();
        alreadyDone = true;
      }

      // Did the reference node's last combination do the base case?
      if ((lastQuery != NULL) && (queryNode.Point(0) == lastQuery->Point(0)))
      {
        baseCase = referenceNode.Stat().LastDistance();
        alreadyDone = true;
      }

      // If the query node is a self-child, did the query parent's last
      // combination do the base case?
      if ((queryNode.Parent() != NULL) &&
          (queryNode.Point(0) == queryNode.Parent()->Point(0)))
      {
        TreeType* lastParentRef = (TreeType*)
            queryNode.Parent()->Stat().LastDistanceNode();
        if ((lastParentRef != NULL) &&
            (referenceNode.Point(0) == lastParentRef->Point(0)))
        {
          baseCase = queryNode.Parent()->Stat().LastDistance();
          alreadyDone = true;
        }
      }

      // If the reference node is a self-child, did the reference parent's last
      // combination do the base case?
      if ((referenceNode.Parent() != NULL) &&
          (referenceNode.Point(0) == referenceNode.Parent()->Point(0)))
      {
        TreeType* lastQueryRef = (TreeType*)
            referenceNode.Parent()->Stat().LastDistanceNode();
        if ((lastQueryRef != NULL) &&
            (queryNode.Point(0) == lastQueryRef->Point(0)))
        {
          baseCase = referenceNode.Parent()->Stat().LastDistance();
          alreadyDone = true;
        }
      }
    }

    if (!alreadyDone)
    {
      // We must calculate the base case.
      baseCase = BaseCase(queryNode.Point(0), referenceNode.Point(0));
    }
    else
    {
      // Make sure that if BaseCase() is called, we don't duplicate results.
      lastQueryIndex = queryNode.Point(0);
      lastReferenceIndex = referenceNode.Point(0);
    }

    distances.Lo() = baseCase - queryNode.FurthestDescendantDistance()
        - referenceNode.FurthestDescendantDistance();
    distances.Hi() = baseCase + queryNode.FurthestDescendantDistance()
        + referenceNode.FurthestDescendantDistance();

    // Update the last distances performed for the query and reference node.
    queryNode.Stat().LastDistanceNode() = (void*) &referenceNode;
    queryNode.Stat().LastDistance() = baseCase;
    referenceNode.Stat().LastDistanceNode() = (void*) &queryNode;
    referenceNode.Stat().LastDistance() = baseCase;
  }
  else
  {
    // Just perform the calculation.
    distances = referenceNode.RangeDistance(&queryNode);
  }

  // If the ranges do not overlap, prune this node.
  if (!distances.Contains(range))
    return DBL_MAX;

  // In this case, all of the points in the reference node will be part of all
  // the results for each point in the query node.
  if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi()))
  {
    for (size_t i = 0; i < queryNode.NumDescendants(); ++i)
      AddResult(queryNode.Descendant(i), referenceNode);
    return DBL_MAX; // We don't need to go any deeper.
  }

  // Otherwise the score doesn't matter.  Recursion order is irrelevant in range
  // search.
  return 0.0;
}