void KCleaf::sampleCtr( // sample from leaf node KMpoint c, // the sampled point (returned) KMorthRect &bnd_box) // bounding box for current node { int ri = kmRanInt(n_data); // generate random index kmCopyPt(kcDim, kcPoints[bkt[ri]], c); // copy to destination }
//---------------------------------------------------------------------- // swapOneCenter // Swaps one center point with a sample point. Optionally we make // sure that the new point is not a duplicate of any of the centers // (including the point being replaced). //---------------------------------------------------------------------- void KMfilterCenters::swapOneCenter( // swap one center bool allowDuplicate) // allow duplicate centers { int rj = kmRanInt(kCtrs); // index of center to replace int dim = getDim(); KMpoint p = kmAllocPt(dim); // alloc replacement point pts->sampleCtr(p); // sample a replacement if (!allowDuplicate) { // duplicates not allowed? bool dupFound; // was a duplicate found? do { // repeat until successful dupFound = false; for (int j = 0; j < kCtrs; j++) { // search for duplicates if (kmEqualPts(dim, p, ctrs[j])) { dupFound = true; pts->sampleCtr(p); // try again break; } } } while (dupFound); } kmCopyPt(dim, p, ctrs[rj]); // copy sampled point if (kmStatLev >= STEP) { // output swap info *kmOut << "\tswapping: "; kmPrintPt(p, getDim(), true); *kmOut << "<-->Center[" << rj << "]\n"; } kmDeallocPt(p); // deallocate point storage invalidate(); // distortions now invalid }
void kmClusEllipsoids( // clustered around ellipsoids KMpointArray pa, // point array (modified) int n, // number of points int dim, // dimension int n_col, // number of colors bool new_clust, // generate new clusters. double std_dev_small, // small standard deviation double std_dev_lo, // low standard deviation for ellipses double std_dev_hi, // high standard deviation for ellipses int max_dim) // maximum dimension of the flats { static KMpointArray clusters = NULL; // cluster centers static KMpointArray stdDev = NULL; // standard deviations if (clusters == NULL || new_clust) { // need new cluster centers if (clusters != NULL) // clusters already exist kmDeallocPts(clusters); // get rid of them if (stdDev != NULL) // std deviations already exist kmDeallocPts(stdDev); // get rid of them clusters = kmAllocPts(n_col, dim); // alloc new clusters and devs stdDev = kmAllocPts(n_col, dim); for (int i = 0; i < n_col; i++) { // gen cluster center coords for (int d = 0; d < dim; d++) { clusters[i][d] = (KMcoord) kmRanUnif(-1,1); } } for (int c = 0; c < n_col; c++) { // generate cluster std dev int n_dim = 1 + kmRanInt(max_dim); // number of dimensions in flat for (int d = 0; d < dim; d++) { // generate std dev's // prob. of picking next dim double Prob = ((double) n_dim)/((double) (dim-d)); if (kmRan0() < Prob) { // add this one to ellipse // generate random std dev stdDev[c][d] = kmRanUnif(std_dev_lo, std_dev_hi); n_dim--; // one fewer dim to fill } else { // don't take this one stdDev[c][d] = std_dev_small;// use small std dev } } } } int next = 0; // next slot to fill for (int c = 0; c < n_col; c++) { // generate clusters int pick = (n+c)/n_col; // number of points to pick for (int i = 0; i < pick; i++) { for (int d = 0; d < dim; d++) { pa[next][d] = (KMcoord) (stdDev[c][d]*kmRanGauss() + clusters[c][d]); } next++; } } }
void kmClusOrthFlats( // clustered along orthogonal flats KMpointArray pa, // point array (modified) int n, // number of points int dim, // dimension int n_col, // number of colors bool new_clust, // generate new clusters. double std_dev, // standard deviation within clusters int max_dim) // maximum dimension of the flats { const double CO_FLAG = 999; // special flag value static KMpointArray control = NULL; // control vectors if (control == NULL || new_clust) { // need new cluster centers if (control != NULL) { // clusters already exist kmDeallocPts(control); // get rid of them } control = kmAllocPts(n_col, dim); for (int c = 0; c < n_col; c++) { // generate clusters int n_dim = 1 + kmRanInt(max_dim); // number of dimensions in flat for (int d = 0; d < dim; d++) { // generate side locations // prob. of picking next dim double Prob = ((double) n_dim)/((double) (dim-d)); if (kmRan0() < Prob) { // add this one to flat control[c][d] = CO_FLAG; // flag this entry n_dim--; // one fewer dim to fill } else { // don't take this one control[c][d] = kmRanUnif(-1,1);// random value in [-1,1] } } } } int next = 0; // next slot to fill for (int c = 0; c < n_col; c++) { // generate clusters int pick = (n+c)/n_col; // number of points to pick for (int i = 0; i < pick; i++) { for (int d = 0; d < dim; d++) { if (control[c][d] == CO_FLAG) // dimension on flat pa[next][d] = (KMcoord) kmRanUnif(-1,1); else // dimension off flat pa[next][d] = (KMcoord) (std_dev*kmRanGauss() + control[c][d]); } next++; } } }
void kmClusGaussPts( // clustered-Gaussian distribution KMpointArray pa, // point array (modified) int n, // number of points int dim, // dimension int n_col, // number of colors bool new_clust, // generate new clusters. double std_dev, // standard deviation within clusters double* clus_sep) // cluster separation (returned) { if (cgClusters == NULL || new_clust) {// need new cluster centers if (cgClusters != NULL) // clusters already exist kmDeallocPts(cgClusters); // get rid of them cgClusters = kmAllocPts(n_col, dim); // generate cluster center coords for (int i = 0; i < n_col; i++) { for (int d = 0; d < dim; d++) { cgClusters[i][d] = (KMcoord) kmRanUnif(-1,1); } } } double minDist = double(dim); // minimum inter-center sq'd distance for (int i = 0; i < n_col; i++) { // compute minimum separation for (int j = i+1; j < n_col; j++) { double dist = kmDist(dim, cgClusters[i], cgClusters[j]); if (dist < minDist) minDist = dist; } } // cluster separation if (clus_sep != NULL) *clus_sep = sqrt(minDist)/(sqrt(double(dim))*std_dev); for (int i = 0; i < n; i++) { int c = kmRanInt(n_col); // generate cluster index for (int d = 0; d < dim; d++) { pa[i][d] = (KMcoord) (std_dev*kmRanGauss() + cgClusters[c][d]); } } }
void KCsplit::sampleCtr( // sample from splitting node KMpoint c, // the sampled point (returned) KMorthRect &bnd_box) // bounding box for current node { int r = kmRanInt(n_nodes()); // random integer [0..n_nodes-1] if (r == 0) { // sample from this node KMorthRect expBox(kcDim); bnd_box.expand(kcDim, 3, expBox); // compute 3x expanded box expBox.sample(kcDim, c); // sample c from box } else if (r <= child[KM_LO]->n_nodes()) { // sample from left KMcoord save = bnd_box.hi[cut_dim]; // save old upper bound bnd_box.hi[cut_dim] = cut_val; // modify for left subtree child[KM_LO]->sampleCtr(c, bnd_box); bnd_box.hi[cut_dim] = save; // restore upper bound } else { // sample from right subtree KMcoord save = bnd_box.lo[cut_dim]; // save old lower bound bnd_box.lo[cut_dim] = cut_val; // modify for right subtree child[KM_HI]->sampleCtr(c, bnd_box); bnd_box.lo[cut_dim] = save; // restore lower bound } }
void KMdata::sampleCtr( // sample a center point KMcenter sample) // where to store sample { int ri = kmRanInt(nPts); // generate random index kmCopyPt(dim, pts[ri], sample); // copy to destination }