Example #1
0
VL_EXPORT void
vl_kdforest_build (VlKDForest * self, vl_size numData, void const * data)
{
  vl_uindex di, ti ;

  /* need to check: if alredy built, clean first */
  self->data = data ;
  self->numData = numData ;
  self->trees = vl_malloc (sizeof(VlKDTree*) * self->numTrees) ;

  for (ti = 0 ; ti < self->numTrees ; ++ ti) {
    self->trees[ti] = vl_malloc (sizeof(VlKDTree)) ;
    self->trees[ti]->dataIndex = vl_malloc (sizeof(VlKDTreeDataIndexEntry) * self->numData) ;
    for (di = 0 ; di < self->numData ; ++ di) {
      self->trees[ti]->dataIndex[di].index = di ;
    }
    self->trees[ti]->numUsedNodes = 0 ;
    /* num. nodes of a complete binary tree with numData leaves */
    self->trees[ti]->numAllocatedNodes = 2 * self->numData - 1 ;
    self->trees[ti]->nodes = vl_malloc (sizeof(VlKDTreeNode) * self->trees[ti]->numAllocatedNodes) ;
    self->trees[ti]->depth = 0 ;
    vl_kdtree_build_recursively (self, self->trees[ti],
                                 vl_kdtree_node_new(self->trees[ti], 0), 0,
                                 self->numData, 0) ;
  }
}
Example #2
0
void
vl_kdforest_build (VlKDForest * self, vl_size numData, void const * data)
{
  vl_uindex di, ti ;
  vl_size maxNumNodes ;
  double * searchBounds;

  /* need to check: if alredy built, clean first */
  self->data = data ;
  self->numData = numData ;
  self->trees = vl_malloc (sizeof(VlKDTree*) * self->numTrees) ;
  maxNumNodes = 0 ;

  for (ti = 0 ; ti < self->numTrees ; ++ ti) {
    self->trees[ti] = vl_malloc (sizeof(VlKDTree)) ;
    self->trees[ti]->dataIndex = vl_malloc (sizeof(VlKDTreeDataIndexEntry) * self->numData) ;
    for (di = 0 ; di < self->numData ; ++ di) {
      self->trees[ti]->dataIndex[di].index = di ;
    }
    self->trees[ti]->numUsedNodes = 0 ;
    /* num. nodes of a complete binary tree with numData leaves */
    self->trees[ti]->numAllocatedNodes = 2 * self->numData - 1 ;
    self->trees[ti]->nodes = vl_malloc (sizeof(VlKDTreeNode) * self->trees[ti]->numAllocatedNodes) ;
    self->trees[ti]->depth = 0 ;
    vl_kdtree_build_recursively (self, self->trees[ti],
                                 vl_kdtree_node_new(self->trees[ti], 0), 0,
                                 self->numData, 0) ;
    maxNumNodes += self->trees[ti]->numUsedNodes ;
  }

  searchBounds = vl_malloc(sizeof(double) * 2 * self->dimension);

  for (ti = 0 ; ti < self->numTrees ; ++ ti) {
    double * iter = searchBounds  ;
    double * end = iter + 2 * self->dimension ;
    while (iter < end) {
      *iter++ = - VL_INFINITY_F ;
      *iter++ = + VL_INFINITY_F ;
    }

    vl_kdtree_calc_bounds_recursively (self->trees[ti], 0, searchBounds) ;
  }

  vl_free(searchBounds);
  self -> maxNumNodes = maxNumNodes;
}
Example #3
0
static void
vl_kdtree_build_recursively
(VlKDForest * forest,
 VlKDTree * tree, vl_uindex nodeIndex,
 vl_uindex dataBegin, vl_uindex dataEnd,
 unsigned int depth)
{
  vl_uindex d, i, medianIndex, splitIndex ;
  VlKDTreeNode * node = tree->nodes + nodeIndex ;
  VlKDTreeSplitDimension * splitDimension ;

  /* base case: there is only one data point */
  if (dataEnd - dataBegin <= 1) {
    if (tree->depth < depth) tree->depth = depth ;
    node->lowerChild = - dataBegin - 1;
    node->upperChild = - dataEnd - 1 ;
    return ;
  }

  /* compute the dimension with largest variance */
  forest->splitHeapNumNodes = 0 ;
  for (d = 0 ; d < forest->dimension ; ++ d) {
    double mean = 0 ; /* unnormalized */
    double secondMoment = 0 ;
    double variance = 0 ;
    for (i = dataBegin ; i < dataEnd ; ++ i) {
      int di = tree -> dataIndex [i] .index ;
      double datum ;
      switch(forest->dataType) {
        case VL_TYPE_FLOAT: datum = ((float const*)forest->data)
          [di * forest->dimension + d] ; break ;
        case VL_TYPE_DOUBLE: datum = ((double const*)forest->data)
          [di * forest->dimension + d] ; break ;
        default:
          abort() ;
      }
      mean += datum ;
      secondMoment += datum * datum ;
    }
    mean /= (dataEnd - dataBegin) ;
    secondMoment /= (dataEnd - dataBegin) ;
    variance = secondMoment - mean * mean ;

    /* keep splitHeapSize most varying dimensions */
    if (forest->splitHeapNumNodes < forest->splitHeapSize) {
      VlKDTreeSplitDimension * splitDimension
        = forest->splitHeapArray + forest->splitHeapNumNodes ;
      splitDimension->dimension = d ;
      splitDimension->mean = mean ;
      splitDimension->variance = variance ;
      vl_kdtree_split_heap_push (forest->splitHeapArray, &forest->splitHeapNumNodes) ;
    } else {
      VlKDTreeSplitDimension * splitDimension = forest->splitHeapArray + 0 ;
      if (splitDimension->variance < variance) {
        splitDimension->dimension = d ;
        splitDimension->mean = mean ;
        splitDimension->variance = variance ;
        vl_kdtree_split_heap_update (forest->splitHeapArray, forest->splitHeapNumNodes, 0) ;
      }
    }
  }

  /* toss a dice to decide the splitting dimension */
  splitDimension = forest->splitHeapArray
  + (vl_rand_uint32(forest->rand) % VL_MIN(forest->splitHeapSize, forest->splitHeapNumNodes)) ;

  /* additional base case: variance is equal to 0 (overlapping points) */
  if (splitDimension->variance == 0) {
    node->lowerChild = - dataBegin - 1 ;
    node->upperChild = - dataEnd - 1 ;
    return ;
  }
  node->splitDimension = splitDimension->dimension ;

  /* sort data along largest variance dimension */
  for (i = dataBegin ; i < dataEnd ; ++ i) {
    int di = tree->dataIndex [i] .index ;
    double datum ;
    switch (forest->dataType) {
      case VL_TYPE_FLOAT: datum = ((float const*)forest->data)
        [di * forest->dimension + splitDimension->dimension] ;
        break ;
      case VL_TYPE_DOUBLE: datum = ((double const*)forest->data)
        [di * forest->dimension + splitDimension->dimension] ;
        break ;
      default:
        abort() ;
    }
    tree->dataIndex [i] .value = datum ;
  }
  qsort (tree->dataIndex + dataBegin,
         dataEnd - dataBegin,
         sizeof (VlKDTreeDataIndexEntry),
         vl_kdtree_compare_index_entries) ;

  /* determine split threshold */
  switch (forest->thresholdingMethod) {
    case VL_KDTREE_MEAN :
      node->splitThreshold = splitDimension->mean ;
      for (splitIndex = dataBegin ;
           splitIndex < dataEnd && tree->dataIndex[splitIndex].value <= node->splitThreshold ;
           ++ splitIndex) ;
      splitIndex -= 1 ;
      /* If the mean does not provide a proper partition, fall back to
       * median. This usually happens if all points have the same
       * value and the zero variance test fails for numerical accuracy
       * reasons. In this case, also due to numerical accuracy, the
       * mean value can be smaller, equal, or larger than all
       * points. */
      if (dataBegin <= splitIndex && splitIndex + 1 < dataEnd) break ;

    case VL_KDTREE_MEDIAN :
      medianIndex = (dataBegin + dataEnd - 1) / 2 ;
      splitIndex = medianIndex ;
      node -> splitThreshold = tree->dataIndex[medianIndex].value ;
      break ;

    default:
      abort() ;
  }

  /* divide subparts */
  node->lowerChild = vl_kdtree_node_new (tree, nodeIndex) ;
  vl_kdtree_build_recursively (forest, tree, node->lowerChild, dataBegin, splitIndex + 1, depth + 1) ;

  node->upperChild = vl_kdtree_node_new (tree, nodeIndex) ;
  vl_kdtree_build_recursively (forest, tree, node->upperChild, splitIndex + 1, dataEnd, depth + 1) ;
}