Scalar KmTree::SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers, Scalar *dist_sq) const { // See if we can assign all points in this node to one cluster if (node->kmpp_cluster_index >= 0) { if (ShouldBePruned(node->median, node->radius, centers, node->kmpp_cluster_index, new_cluster)) return GetNodeCost(node, centers + node->kmpp_cluster_index*d_); if (ShouldBePruned(node->median, node->radius, centers, new_cluster, node->kmpp_cluster_index)) { SeedKmppSetClusterIndex(node, new_cluster); for (int i = node->first_point_index; i < node->first_point_index + node->num_points; i++) dist_sq[i] = KMeans_PointDistSq(points_ + point_indices_[i]*d_, centers + new_cluster*d_, d_); return GetNodeCost(node, centers + new_cluster*d_); } // It may be that the a leaf-node point is equidistant from the new center or old if (node->lower_node == 0) return GetNodeCost(node, centers + node->kmpp_cluster_index*d_); } // Recurse Scalar cost = SeedKmppUpdateAssignment(node->lower_node, new_cluster, centers, dist_sq) + SeedKmppUpdateAssignment(node->upper_node, new_cluster, centers, dist_sq); int i1 = node->lower_node->kmpp_cluster_index, i2 = node->upper_node->kmpp_cluster_index; if (i1 == i2 && i1 != -1) node->kmpp_cluster_index = i1; else node->kmpp_cluster_index = -1; return cost; }
// A recursive version of DoKMeansStep. This determines which clusters all points that are rooted // node will be assigned to, and updates sums, counts and assignment (if not null) accordingly. // candidates maintains the set of cluster indices which could possibly be the closest clusters // for points in this subtree. Scalar KmTree::DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers, Scalar *sums, int *counts, int *assignment) const { // Determine which center the node center is closest to Scalar min_dist_sq = KMeans_PointDistSq(node->median, centers + candidates[0]*d_, d_); int closest_i = candidates[0]; for (int i = 1; i < k; i++) { Scalar dist_sq = KMeans_PointDistSq(node->median, centers + candidates[i]*d_, d_); if (dist_sq < min_dist_sq) { min_dist_sq = dist_sq; closest_i = candidates[i]; } } // If this is a non-leaf node, recurse if necessary if (node->lower_node != 0) { // Build the new list of candidates int new_k = 0; int *new_candidates = (int*)malloc(k * sizeof(int)); KM_ASSERT(new_candidates != 0); for (int i = 0; i < k; i++) if (!ShouldBePruned(node->median, node->radius, centers, closest_i, candidates[i])) new_candidates[new_k++] = candidates[i]; // Recurse if there's at least two if (new_k > 1) { Scalar result = DoKMeansStepAtNode(node->lower_node, new_k, new_candidates, centers, sums, counts, assignment) + DoKMeansStepAtNode(node->upper_node, new_k, new_candidates, centers, sums, counts, assignment); free(new_candidates); return result; } else { free(new_candidates); } } // Assigns all points within this node to a single center KMeans_PointAdd(sums + closest_i*d_, node->sum, d_); counts[closest_i] += node->num_points; if (assignment != 0) { for (int i = node->first_point_index; i < node->first_point_index + node->num_points; i++) assignment[point_indices_[i]] = closest_i; } return GetNodeCost(node, centers + closest_i*d_); }
// Build a kd tree from the given set of points KmTree::Node *KmTree::BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data) { // Allocate the node Node *node = (Node*)(*next_node_data); (*next_node_data) += sizeof(Node); node->sum = (Scalar*)(*next_node_data); (*next_node_data) += sizeof(Scalar) * d_; node->median = (Scalar*)(*next_node_data); (*next_node_data) += sizeof(Scalar) * d_; node->radius = (Scalar*)(*next_node_data); (*next_node_data) += sizeof(Scalar) * d_; // Fill in basic info node->num_points = (last_index - first_index + 1); node->first_point_index = first_index; // Calculate the bounding box Scalar *first_point = points + point_indices_[first_index] * d_; Scalar *bound_p1 = KMeans_PointAllocate(d_); Scalar *bound_p2 = KMeans_PointAllocate(d_); KM_ASSERT(bound_p1 != 0 && bound_p2 != 0); KMeans_PointCopy(bound_p1, first_point, d_); KMeans_PointCopy(bound_p2, first_point, d_); for (int i = first_index+1; i <= last_index; i++) for (int j = 0; j < d_; j++) { Scalar c = points[point_indices_[i]*d_ + j]; if (bound_p1[j] > c) bound_p1[j] = c; if (bound_p2[j] < c) bound_p2[j] = c; } // Calculate bounding box stats and delete the bounding box memory Scalar max_radius = -1; int split_d = -1; for (int j = 0; j < d_; j++) { node->median[j] = (bound_p1[j] + bound_p2[j]) / 2; node->radius[j] = (bound_p2[j] - bound_p1[j]) / 2; if (node->radius[j] > max_radius) { max_radius = node->radius[j]; split_d = j; } } KMeans_PointFree(bound_p2); KMeans_PointFree(bound_p1); // If the max spread is 0, make this a leaf node if (max_radius == 0) { node->lower_node = node->upper_node = 0; KMeans_PointCopy(node->sum, first_point, d_); if (last_index != first_index) KMeans_PointScale(node->sum, Scalar(last_index - first_index + 1), d_); node->opt_cost = 0; return node; } // Partition the points around the midpoint in this dimension. The partitioning is done in-place // by iterating from left-to-right and right-to-left in the same way that partioning is done for // quicksort. Scalar split_pos = node->median[split_d]; int i1 = first_index, i2 = last_index, size1 = 0; while (i1 <= i2) { bool is_i1_good = (points[point_indices_[i1]*d_ + split_d] < split_pos); bool is_i2_good = (points[point_indices_[i2]*d_ + split_d] >= split_pos); if (!is_i1_good && !is_i2_good) { int temp = point_indices_[i1]; point_indices_[i1] = point_indices_[i2]; point_indices_[i2] = temp; is_i1_good = is_i2_good = true; } if (is_i1_good) { i1++; size1++; } if (is_i2_good) { i2--; } } // Create the child nodes KM_ASSERT(size1 >= 1 && size1 <= last_index - first_index); node->lower_node = BuildNodes(points, first_index, first_index + size1 - 1, next_node_data); node->upper_node = BuildNodes(points, first_index + size1, last_index, next_node_data); // Calculate the new sum and opt cost KMeans_PointCopy(node->sum, node->lower_node->sum, d_); KMeans_PointAdd(node->sum, node->upper_node->sum, d_); Scalar *center = KMeans_PointAllocate(d_); KM_ASSERT(center != 0); KMeans_PointCopy(center, node->sum, d_); KMeans_PointScale(center, Scalar(1) / node->num_points, d_); node->opt_cost = GetNodeCost(node->lower_node, center) + GetNodeCost(node->upper_node, center); KMeans_PointFree(center); return node; }
ASPath ASPathCreate(const ASPathNodeSource *source, void *context, void *startNodeKey, void *goalNodeKey) { VisitedNodes visitedNodes; ASNeighborList neighborList; Node current; Node goalNode; ASPath path = NULL; if (!startNodeKey || !source || !source->nodeNeighbors || source->nodeSize == 0) { return NULL; } visitedNodes = VisitedNodesCreate(source, context); neighborList = NeighborListCreate(source); current = GetNode(visitedNodes, startNodeKey); goalNode = GetNode(visitedNodes, goalNodeKey); // mark the goal node as the goal SetNodeIsGoal(goalNode); // set the starting node's estimate cost to the goal and add it to the open set SetNodeEstimatedCost(current, GetPathCostHeuristic(current, goalNode)); AddNodeToOpenSet(current, 0, NodeNull); // perform the A* algorithm while (HasOpenNode(visitedNodes) && !NodeIsGoal((current = GetOpenNode(visitedNodes)))) { size_t n; if (source->earlyExit) { const int shouldExit = source->earlyExit(visitedNodes->nodeRecordsCount, GetNodeKey(current), goalNodeKey, context); if (shouldExit > 0) { SetNodeIsGoal(current); break; } else if (shouldExit < 0) { break; } } RemoveNodeFromOpenSet(current); AddNodeToClosedSet(current); neighborList->count = 0; source->nodeNeighbors(neighborList, GetNodeKey(current), context); for (n=0; n<neighborList->count; n++) { const float cost = GetNodeCost(current) + NeighborListGetEdgeCost(neighborList, n); Node neighbor = GetNode(visitedNodes, NeighborListGetNodeKey(neighborList, n)); if (!NodeHasEstimatedCost(neighbor)) { SetNodeEstimatedCost(neighbor, GetPathCostHeuristic(neighbor, goalNode)); } if (NodeIsInOpenSet(neighbor) && cost < GetNodeCost(neighbor)) { RemoveNodeFromOpenSet(neighbor); } if (NodeIsInClosedSet(neighbor) && cost < GetNodeCost(neighbor)) { RemoveNodeFromClosedSet(neighbor); } if (!NodeIsInOpenSet(neighbor) && !NodeIsInClosedSet(neighbor)) { AddNodeToOpenSet(neighbor, cost, current); } } } if (NodeIsNull(goalNode)) { SetNodeIsGoal(current); } if (NodeIsGoal(current)) { size_t count = 0; Node n = current; size_t i; while (!NodeIsNull(n)) { count++; n = GetParentNode(n); } CMALLOC(path, sizeof(struct __ASPath) + (count * source->nodeSize)); path->nodeSize = source->nodeSize; path->count = count; path->cost = GetNodeCost(current); n = current; for (i=count; i>0; i--) { memcpy(path->nodeKeys + ((i - 1) * source->nodeSize), GetNodeKey(n), source->nodeSize); n = GetParentNode(n); } } NeighborListDestroy(neighborList); VisitedNodesDestroy(visitedNodes); return path; }