/* traverse the quadtree to find the best renking cluster */ static void findBestCluster(layerObj* layer, msClusterLayerInfo* layerinfo, clusterTreeNode *node) { int i; double rank; clusterInfo* s = node->shapes; while (s) { if (s->filter < 0 && layer->cluster.filter.string != NULL) { InitShapeAttributes(layer, s); s->filter = msClusterEvaluateFilter(&layer->cluster.filter, &s->shape); } if (s->numsiblings == 0 || s->filter == 0) { /* individual or filtered shapes must be removed for sure */ layerinfo->current = s; return; } /* calculating the rank */ rank = (s->x - s->avgx) * (s->x - s->avgx) + (s->y - s->avgy) * (s->y - s->avgy) /*+ s->varx + s->vary*/ + (double)1/ (1 + s->numsiblings); if (rank < layerinfo->rank) { layerinfo->current = s; layerinfo->rank = rank; } s = s->next; } /* Recurse to subnodes if they exist */ for (i = 0; i < 4; i++) { if (node->subnode[i]) findBestCluster(layer, layerinfo, node->subnode[i]); } }
void clusterize(int k, entryList &l) { printf("Clusterizing in %d clusters\n", k); cluster *clusters = new cluster[k]; selectSeeds(clusters, l, k); double RSS = printRSS(clusters, k); double prevRSS = 1e+20; int iterations = 0; while(iterations < MAX_ITERATIONS && (prevRSS - RSS) > THRESHOLD) { iterations++; printf("Iteration %d\n", iterations); // Clear cluster members. for(int i=0; i<k; i++) clusters[i].members.clear(); // Find the best cluster for each entry // and add as a member for(int i=0; i<l.size(); i++) { article_entry* entry = l.get(i); cluster& cl = findBestCluster(clusters, k, entry->v); cl.members.add(entry); } // Recompute centroids printf("Computing centroids "); for(int i=0; i<k; i++) { printf("%d ", i); recomputeCentroid(clusters[i]); } printf("\n"); prevRSS = RSS; RSS = printRSS(clusters, k); } delete[] clusters; }
/* rebuild the clusters according to the current extent */ int RebuildClusters(layerObj *layer, int isQuery) { mapObj* map; layerObj* srcLayer; double distance, maxDistanceX, maxDistanceY, cellSizeX, cellSizeY; rectObj searchrect; int status; clusterInfo* current; int depth; #ifdef USE_CLUSTER_EXTERNAL int layerIndex; #endif msClusterLayerInfo* layerinfo = layer->layerinfo; if (!layerinfo) { msSetError(MS_MISCERR, "Layer is not open: %s", "RebuildClusters()", layer->name); return MS_FAILURE; } if (!layer->map) { msSetError(MS_MISCERR, "No map associated with this layer: %s", "RebuildClusters()", layer->name); return MS_FAILURE; } if (layer->debug >= MS_DEBUGLEVEL_VVV) msDebug("Clustering started.\n"); map = layer->map; layerinfo->current = layerinfo->finalized; /* restart */ /* check whether all shapes should be returned from a query */ if(msLayerGetProcessingKey(layer, "CLUSTER_GET_ALL_SHAPES") != NULL) layerinfo->get_all_shapes = MS_TRUE; else layerinfo->get_all_shapes = MS_FALSE; /* identify the current extent */ if(layer->transform == MS_TRUE) searchrect = map->extent; else { searchrect.minx = searchrect.miny = 0; searchrect.maxx = map->width-1; searchrect.maxy = map->height-1; } if (searchrect.minx == layerinfo->searchRect.minx && searchrect.miny == layerinfo->searchRect.miny && searchrect.maxx == layerinfo->searchRect.maxx && searchrect.maxy == layerinfo->searchRect.maxy) { /* already built */ return MS_SUCCESS; } /* destroy previous data*/ clusterDestroyData(layerinfo); layerinfo->searchRect = searchrect; /* reproject the rectangle to layer coordinates */ #ifdef USE_PROJ if((map->projection.numargs > 0) && (layer->projection.numargs > 0)) msProjectRect(&map->projection, &layer->projection, &searchrect); /* project the searchrect to source coords */ #endif /* determine the compare method */ layerinfo->fnCompare = CompareRectangleRegion; if (layer->cluster.region) { if (EQUAL(layer->cluster.region, "ellipse")) layerinfo->fnCompare = CompareEllipseRegion; } /* trying to find a reasonable quadtree depth */ depth = 0; distance = layer->cluster.maxdistance; while ((distance < map->width || distance < map->height) && depth <= TREE_MAX_DEPTH) { distance *= 2; ++depth; } layerinfo->depth = depth; cellSizeX = MS_CELLSIZE(searchrect.minx, searchrect.maxx, map->width); cellSizeY = MS_CELLSIZE(searchrect.miny, searchrect.maxy, map->height); maxDistanceX = layer->cluster.maxdistance * cellSizeX; maxDistanceY = layer->cluster.maxdistance * cellSizeY; /* increase the search rectangle so that the neighbouring shapes are also retrieved */ searchrect.minx -= layer->cluster.buffer * cellSizeX; searchrect.maxx += layer->cluster.buffer * cellSizeX; searchrect.miny -= layer->cluster.buffer * cellSizeY; searchrect.maxy += layer->cluster.buffer * cellSizeY; /* create the root node */ if (layerinfo->root) clusterTreeNodeDestroy(layerinfo, layerinfo->root); layerinfo->root = clusterTreeNodeCreate(layerinfo, searchrect); srcLayer = &layerinfo->srcLayer; /* start retrieving the shapes */ status = msLayerWhichShapes(srcLayer, searchrect, isQuery); if(status == MS_DONE) { /* no overlap */ return MS_SUCCESS; } else if(status != MS_SUCCESS) { return MS_FAILURE; } /* step through the source shapes and populate the quadtree with the tentative clusters */ if ((current = clusterInfoCreate(layerinfo)) == NULL) return MS_FAILURE; while((status = msLayerNextShape(srcLayer, ¤t->shape)) == MS_SUCCESS) { #if defined(USE_PROJ) && defined(USE_CLUSTER_EXTERNAL) /* transform the shape to the projection of this layer */ if(srcLayer->transform == MS_TRUE && srcLayer->project && layer->transform == MS_TRUE && layer->project &&msProjectionsDiffer(&(srcLayer->projection), &(layer->projection))) msProjectShape(&srcLayer->projection, &layer->projection, ¤t->shape); #endif /* set up positions and variance */ current->avgx = current->x = current->shape.bounds.minx; current->avgy = current->y = current->shape.bounds.miny; current->varx = current->vary = 0; /* set up the area of interest when searching for the neighboring shapes */ current->bounds.minx = current->x - maxDistanceX; current->bounds.miny = current->y - maxDistanceY; current->bounds.maxx = current->x + maxDistanceX; current->bounds.maxy = current->y + maxDistanceY; /* if the shape doesn't overlap we must skip it to avoid further issues */ if(!msRectOverlap(&searchrect, ¤t->bounds)) { msFreeShape(¤t->shape); msInitShape(¤t->shape); msDebug("Skipping an invalid shape falling outside of the given extent\n"); continue; } /* construct the item array */ if (layer->iteminfo) BuildFeatureAttributes(layer, layerinfo, ¤t->shape); /* evaluate the group expression */ if (layer->cluster.group.string) current->group = msClusterGetGroupText(&layer->cluster.group, ¤t->shape); /*start a query for the related shapes */ findRelatedShapes(layerinfo, layerinfo->root, current); /* add this shape to the tree */ if (treeNodeAddShape(layerinfo, layerinfo->root, current, depth) != MS_SUCCESS) { clusterInfoDestroyList(layerinfo, current); return MS_FAILURE; } if ((current = clusterInfoCreate(layerinfo)) == NULL) { clusterInfoDestroyList(layerinfo, current); return MS_FAILURE; } } clusterInfoDestroyList(layerinfo, current); while (layerinfo->root) { #ifdef TESTCOUNT int n; double avgx, avgy; #endif /* pick up the best cluster from the tree and do the finalization */ /* the initial rank must be big enough */ layerinfo->rank = (searchrect.maxx - searchrect.minx) * (searchrect.maxx - searchrect.minx) + (searchrect.maxy - searchrect.miny) * (searchrect.maxy - searchrect.miny) + 1; layerinfo->current = NULL; findBestCluster(layer, layerinfo, layerinfo->root); if (layerinfo->current == NULL) { if (layer->debug >= MS_DEBUGLEVEL_VVV) msDebug("Clustering terminated.\n"); break; /* completed */ } /* Update the feature count of the shape */ InitShapeAttributes(layer, layerinfo->current); /* collecting the shapes of the cluster */ collectClusterShapes(layerinfo, layerinfo->root, layerinfo->current); if (layer->debug >= MS_DEBUGLEVEL_VVV) { msDebug("processing cluster %p: rank=%lf fcount=%d ncoll=%d nfin=%d nfins=%d nflt=%d bounds={%lf %lf %lf %lf}\n", layerinfo->current, layerinfo->rank, layerinfo->current->numsiblings + 1, layerinfo->current->numcollected, layerinfo->numFinalized, layerinfo->numFinalizedSiblings, layerinfo->numFiltered, layerinfo->current->bounds.minx, layerinfo->current->bounds.miny, layerinfo->current->bounds.maxx, layerinfo->current->bounds.maxy); if (layerinfo->current->node) { char pszBuffer[TREE_MAX_DEPTH + 1]; clusterTreeNode* node = layerinfo->current->node; int position = node->position; int i = 1; while (position > 0 && i <= TREE_MAX_DEPTH) { pszBuffer[TREE_MAX_DEPTH - i] = '0' + (position % 4); position = position >> 2; ++i; } pszBuffer[TREE_MAX_DEPTH] = 0; msDebug(" ->node %p: count=%d index=%d pos=%s subn={%p %p %p %p} rect={%lf %lf %lf %lf}\n", node, node->numshapes, node->index, pszBuffer + TREE_MAX_DEPTH - i + 1, node->subnode[0], node->subnode[1], node->subnode[2], node->subnode[3], node->rect.minx, node->rect.miny, node->rect.maxx, node->rect.maxy); } }