VL_EXPORT void vl_kdforest_build (VlKDForest * self, vl_size numData, void const * data) { vl_uindex di, ti ; /* need to check: if alredy built, clean first */ self->data = data ; self->numData = numData ; self->trees = vl_malloc (sizeof(VlKDTree*) * self->numTrees) ; for (ti = 0 ; ti < self->numTrees ; ++ ti) { self->trees[ti] = vl_malloc (sizeof(VlKDTree)) ; self->trees[ti]->dataIndex = vl_malloc (sizeof(VlKDTreeDataIndexEntry) * self->numData) ; for (di = 0 ; di < self->numData ; ++ di) { self->trees[ti]->dataIndex[di].index = di ; } self->trees[ti]->numUsedNodes = 0 ; /* num. nodes of a complete binary tree with numData leaves */ self->trees[ti]->numAllocatedNodes = 2 * self->numData - 1 ; self->trees[ti]->nodes = vl_malloc (sizeof(VlKDTreeNode) * self->trees[ti]->numAllocatedNodes) ; self->trees[ti]->depth = 0 ; vl_kdtree_build_recursively (self, self->trees[ti], vl_kdtree_node_new(self->trees[ti], 0), 0, self->numData, 0) ; } }
void vl_kdforest_build (VlKDForest * self, vl_size numData, void const * data) { vl_uindex di, ti ; vl_size maxNumNodes ; double * searchBounds; /* need to check: if alredy built, clean first */ self->data = data ; self->numData = numData ; self->trees = vl_malloc (sizeof(VlKDTree*) * self->numTrees) ; maxNumNodes = 0 ; for (ti = 0 ; ti < self->numTrees ; ++ ti) { self->trees[ti] = vl_malloc (sizeof(VlKDTree)) ; self->trees[ti]->dataIndex = vl_malloc (sizeof(VlKDTreeDataIndexEntry) * self->numData) ; for (di = 0 ; di < self->numData ; ++ di) { self->trees[ti]->dataIndex[di].index = di ; } self->trees[ti]->numUsedNodes = 0 ; /* num. nodes of a complete binary tree with numData leaves */ self->trees[ti]->numAllocatedNodes = 2 * self->numData - 1 ; self->trees[ti]->nodes = vl_malloc (sizeof(VlKDTreeNode) * self->trees[ti]->numAllocatedNodes) ; self->trees[ti]->depth = 0 ; vl_kdtree_build_recursively (self, self->trees[ti], vl_kdtree_node_new(self->trees[ti], 0), 0, self->numData, 0) ; maxNumNodes += self->trees[ti]->numUsedNodes ; } searchBounds = vl_malloc(sizeof(double) * 2 * self->dimension); for (ti = 0 ; ti < self->numTrees ; ++ ti) { double * iter = searchBounds ; double * end = iter + 2 * self->dimension ; while (iter < end) { *iter++ = - VL_INFINITY_F ; *iter++ = + VL_INFINITY_F ; } vl_kdtree_calc_bounds_recursively (self->trees[ti], 0, searchBounds) ; } vl_free(searchBounds); self -> maxNumNodes = maxNumNodes; }
static void vl_kdtree_build_recursively (VlKDForest * forest, VlKDTree * tree, vl_uindex nodeIndex, vl_uindex dataBegin, vl_uindex dataEnd, unsigned int depth) { vl_uindex d, i, medianIndex, splitIndex ; VlKDTreeNode * node = tree->nodes + nodeIndex ; VlKDTreeSplitDimension * splitDimension ; /* base case: there is only one data point */ if (dataEnd - dataBegin <= 1) { if (tree->depth < depth) tree->depth = depth ; node->lowerChild = - dataBegin - 1; node->upperChild = - dataEnd - 1 ; return ; } /* compute the dimension with largest variance */ forest->splitHeapNumNodes = 0 ; for (d = 0 ; d < forest->dimension ; ++ d) { double mean = 0 ; /* unnormalized */ double secondMoment = 0 ; double variance = 0 ; for (i = dataBegin ; i < dataEnd ; ++ i) { int di = tree -> dataIndex [i] .index ; double datum ; switch(forest->dataType) { case VL_TYPE_FLOAT: datum = ((float const*)forest->data) [di * forest->dimension + d] ; break ; case VL_TYPE_DOUBLE: datum = ((double const*)forest->data) [di * forest->dimension + d] ; break ; default: abort() ; } mean += datum ; secondMoment += datum * datum ; } mean /= (dataEnd - dataBegin) ; secondMoment /= (dataEnd - dataBegin) ; variance = secondMoment - mean * mean ; /* keep splitHeapSize most varying dimensions */ if (forest->splitHeapNumNodes < forest->splitHeapSize) { VlKDTreeSplitDimension * splitDimension = forest->splitHeapArray + forest->splitHeapNumNodes ; splitDimension->dimension = d ; splitDimension->mean = mean ; splitDimension->variance = variance ; vl_kdtree_split_heap_push (forest->splitHeapArray, &forest->splitHeapNumNodes) ; } else { VlKDTreeSplitDimension * splitDimension = forest->splitHeapArray + 0 ; if (splitDimension->variance < variance) { splitDimension->dimension = d ; splitDimension->mean = mean ; splitDimension->variance = variance ; vl_kdtree_split_heap_update (forest->splitHeapArray, forest->splitHeapNumNodes, 0) ; } } } /* toss a dice to decide the splitting dimension */ splitDimension = forest->splitHeapArray + (vl_rand_uint32(forest->rand) % VL_MIN(forest->splitHeapSize, forest->splitHeapNumNodes)) ; /* additional base case: variance is equal to 0 (overlapping points) */ if (splitDimension->variance == 0) { node->lowerChild = - dataBegin - 1 ; node->upperChild = - dataEnd - 1 ; return ; } node->splitDimension = splitDimension->dimension ; /* sort data along largest variance dimension */ for (i = dataBegin ; i < dataEnd ; ++ i) { int di = tree->dataIndex [i] .index ; double datum ; switch (forest->dataType) { case VL_TYPE_FLOAT: datum = ((float const*)forest->data) [di * forest->dimension + splitDimension->dimension] ; break ; case VL_TYPE_DOUBLE: datum = ((double const*)forest->data) [di * forest->dimension + splitDimension->dimension] ; break ; default: abort() ; } tree->dataIndex [i] .value = datum ; } qsort (tree->dataIndex + dataBegin, dataEnd - dataBegin, sizeof (VlKDTreeDataIndexEntry), vl_kdtree_compare_index_entries) ; /* determine split threshold */ switch (forest->thresholdingMethod) { case VL_KDTREE_MEAN : node->splitThreshold = splitDimension->mean ; for (splitIndex = dataBegin ; splitIndex < dataEnd && tree->dataIndex[splitIndex].value <= node->splitThreshold ; ++ splitIndex) ; splitIndex -= 1 ; /* If the mean does not provide a proper partition, fall back to * median. This usually happens if all points have the same * value and the zero variance test fails for numerical accuracy * reasons. In this case, also due to numerical accuracy, the * mean value can be smaller, equal, or larger than all * points. */ if (dataBegin <= splitIndex && splitIndex + 1 < dataEnd) break ; case VL_KDTREE_MEDIAN : medianIndex = (dataBegin + dataEnd - 1) / 2 ; splitIndex = medianIndex ; node -> splitThreshold = tree->dataIndex[medianIndex].value ; break ; default: abort() ; } /* divide subparts */ node->lowerChild = vl_kdtree_node_new (tree, nodeIndex) ; vl_kdtree_build_recursively (forest, tree, node->lowerChild, dataBegin, splitIndex + 1, depth + 1) ; node->upperChild = vl_kdtree_node_new (tree, nodeIndex) ; vl_kdtree_build_recursively (forest, tree, node->upperChild, splitIndex + 1, dataEnd, depth + 1) ; }