static void VL_XCAT(_vl_kmeans_quantize_, SFX) (VlKMeans * self, vl_uint32 * assignments, TYPE * distances, TYPE const * data, vl_size numData) { vl_uindex i ; #if (FLT == VL_TYPE_FLOAT) VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ; #else VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ; #endif TYPE * distanceToCenters = vl_malloc (sizeof(TYPE) * self->numCenters) ; for (i = 0 ; i < numData ; ++i) { vl_size k ; TYPE bestDistance = (TYPE) VL_INFINITY_D ; VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(distanceToCenters, self->dimension, data + self->dimension * i, 1, (TYPE*)self->centers, self->numCenters, distFn) ; for (k = 0 ; k < self->numCenters ; ++k) { if (distanceToCenters[k] < bestDistance) { bestDistance = distanceToCenters[k] ; assignments[i] = k ; } } if (distances) distances[i] = bestDistance ; } vl_free(distanceToCenters) ; }
VlKDForest * load_VlKDForest(const char *fname) { FILE *fp; size_t n; VlKDForest *self = vl_malloc (sizeof(VlKDForest)) ; if((fp = fopen(fname, "rb")) == NULL) return -1; n = read_VlKDForest(fp, self); fclose(fp); self -> rand = vl_get_rand (); self -> searchHeapArray = 0; self -> searchIdBook = 0; switch (self->dataType) { case VL_TYPE_FLOAT: self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_f (VlDistanceL2) ; break ; case VL_TYPE_DOUBLE : self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_d (VlDistanceL2) ; break ; default : abort() ; } return self; }
static void VL_XCAT(_vl_kmeans_seed_centers_with_rand_data_, SFX) (VlKMeans * self, TYPE const * data, vl_size dimension, vl_size numData, vl_size numCenters) { vl_uindex i, j, k ; VlRand * rand = vl_get_rand () ; self->dimension = dimension ; self->numCenters = numCenters ; self->centers = vl_malloc (sizeof(TYPE) * dimension * numCenters) ; { vl_uindex * perm = vl_malloc (sizeof(vl_uindex) * numData) ; #if (FLT == VL_TYPE_FLOAT) VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ; #else VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ; #endif TYPE * distances = vl_malloc (sizeof(TYPE) * numCenters) ; /* get a random permutation of the data point */ for (i = 0 ; i < numData ; ++i) perm[i] = i ; _vl_kmeans_shuffle (perm, numData, rand) ; for (k = 0, i = 0 ; k < numCenters ; ++ i) { /* compare the next data point to all centers collected so far to detect duplicates (if there are enough left) */ if (numCenters - k < numData - i) { vl_bool duplicateDetected = VL_FALSE ; VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(distances, dimension, data + dimension * perm[i], 1, (TYPE*)self->centers, k, distFn) ; for (j = 0 ; j < k ; ++j) { duplicateDetected |= (distances[j] == 0) ; } if (duplicateDetected) continue ; } /* ok, it is not a duplicate so we can accept it! */ memcpy ((TYPE*)self->centers + dimension * k, data + dimension * perm[i], sizeof(TYPE) * dimension) ; k ++ ; } vl_free(distances) ; vl_free(perm) ; } }
void test_dist() { void (*fdist)(void) = (void(*)(void)) vl_get_vector_comparison_function_f (VlDistanceL2); float x[]={ 0, 0 }; float y[]={ 2, 0 }; printf("%f\n", ((VlFloatVectorComparisonFunction)fdist)(2, x, y)); }
int main (int argc, char** argv) { float * X ; float * Y ; vl_size numDimensions = 1000 ; vl_size numSamples = 2000 ; float * result = vl_malloc (sizeof(float) * numSamples * numSamples) ; VlFloatVectorComparisonFunction f ; init_data (numDimensions, numSamples, &X, &Y) ; X+=1 ; Y+=1 ; vl_set_simd_enabled (VL_FALSE) ; f = vl_get_vector_comparison_function_f (VlDistanceL2) ; vl_tic () ; vl_eval_vector_comparison_on_all_pairs_f (result, numDimensions, X, numSamples, Y, numSamples, f) ; VL_PRINTF("Float L2 distnace: %.3f s\n", vl_toc ()) ; vl_set_simd_enabled (VL_TRUE) ; f = vl_get_vector_comparison_function_f (VlDistanceL2) ; vl_tic () ; vl_eval_vector_comparison_on_all_pairs_f (result, numDimensions, X, numSamples, Y, numSamples, f) ; VL_PRINTF("Float L2 distance (SIMD): %.3f s\n", vl_toc ()) ; X-- ; Y-- ; vl_free (X) ; vl_free (Y) ; vl_free (result) ; return 0 ; }
VL_EXPORT VlKDForest * vl_kdforest_new (vl_type dataType, vl_size dimension, vl_size numTrees) { VlKDForest * self = vl_malloc (sizeof(VlKDForest)) ; assert(dataType == VL_TYPE_FLOAT || dataType == VL_TYPE_DOUBLE) ; assert(dimension >= 1) ; assert(numTrees >= 1) ; self -> rand = vl_get_rand () ; self -> dataType = dataType ; self -> numData = 0 ; self -> data = 0 ; self -> dimension = dimension ; self -> numTrees = numTrees ; self -> trees = 0 ; self -> thresholdingMethod = VL_KDTREE_MEDIAN ; self -> splitHeapSize = (numTrees == 1) ? 1 : VL_KDTREE_SPLIT_HEALP_SIZE ; self -> splitHeapNumNodes = 0 ; self -> searchHeapArray = 0 ; self -> searchHeapNumNodes = 0 ; self -> searchMaxNumComparisons = 0 ; self -> searchIdBook = 0 ; self -> searchId = 0 ; switch (self->dataType) { case VL_TYPE_FLOAT: self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_f (VlDistanceL2) ; break ; case VL_TYPE_DOUBLE : self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_d (VlDistanceL2) ; break ; default : abort() ; } return self ; }
static double VL_XCAT(_vl_kmeans_update_center_distances_, SFX) (VlKMeans * self) { #if (FLT == VL_TYPE_FLOAT) VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ; #else VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ; #endif if (! self->centerDistances) { self->centerDistances = vl_malloc (sizeof(TYPE) * self->numCenters * self->numCenters) ; } VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(self->centerDistances, self->dimension, self->centers, self->numCenters, NULL, 0, distFn) ; return self->numCenters * (self->numCenters - 1) / 2 ; }
VlKDForest * vl_kdforest_new (vl_type dataType, vl_size dimension, vl_size numTrees, VlVectorComparisonType distance) { VlKDForest * self = vl_calloc (sizeof(VlKDForest), 1) ; assert(dataType == VL_TYPE_FLOAT || dataType == VL_TYPE_DOUBLE) ; assert(dimension >= 1) ; assert(numTrees >= 1) ; self -> rand = vl_get_rand () ; self -> dataType = dataType ; self -> numData = 0 ; self -> data = 0 ; self -> dimension = dimension ; self -> numTrees = numTrees ; self -> trees = 0 ; self -> thresholdingMethod = VL_KDTREE_MEDIAN ; self -> splitHeapSize = VL_MIN(numTrees, VL_KDTREE_SPLIT_HEAP_SIZE) ; self -> splitHeapNumNodes = 0 ; self -> distance = distance; self -> maxNumNodes = 0 ; switch (self->dataType) { case VL_TYPE_FLOAT: self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_f (distance) ; break; case VL_TYPE_DOUBLE : self -> distanceFunction = (void(*)(void)) vl_get_vector_comparison_function_d (distance) ; break ; default : abort() ; } return self ; }
static double VL_XCAT(_vl_kmeans_refine_centers_elkan_, SFX) (VlKMeans * self, TYPE const * data, vl_size numData) { vl_size d, iteration, x ; vl_uint32 c, j ; vl_bool allDone ; TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ; vl_uint32 * assignments = vl_malloc (sizeof(vl_uint32) * numData) ; vl_size * clusterMasses = vl_malloc (sizeof(vl_size) * numData) ; #if (FLT == VL_TYPE_FLOAT) VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ; #else VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ; #endif TYPE * nextCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ; TYPE * pointToClosestCenterUB = vl_malloc (sizeof(TYPE) * numData) ; vl_bool * pointToClosestCenterUBIsStrict = vl_malloc (sizeof(vl_bool) * numData) ; TYPE * pointToCenterLB = vl_malloc (sizeof(TYPE) * numData * self->numCenters) ; TYPE * newCenters = vl_malloc(sizeof(TYPE) * self->dimension * self->numCenters) ; TYPE * centerToNewCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ; vl_uint32 * permutations = NULL ; vl_size * numSeenSoFar = NULL ; double energy ; vl_size totDistanceComputationsToInit = 0 ; vl_size totDistanceComputationsToRefreshUB = 0 ; vl_size totDistanceComputationsToRefreshLB = 0 ; vl_size totDistanceComputationsToRefreshCenterDistances = 0 ; vl_size totDistanceComputationsToNewCenters = 0 ; vl_size totDistanceComputationsToFinalize = 0 ; if (self->distance == VlDistanceL1) { permutations = vl_malloc(sizeof(vl_uint32) * numData * self->dimension) ; numSeenSoFar = vl_malloc(sizeof(vl_size) * self->numCenters) ; VL_XCAT(_vl_kmeans_sort_data_helper_, SFX)(self, permutations, data, numData) ; } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Initialization */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* An iteration is: get_new_centers + reassign + get_energy. This counts as iteration 0, where get_new_centers is assumed to be performed before calling the train function by the initialization function */ /* update distances between centers */ totDistanceComputationsToInit += VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ; /* assigmen points to the initial centers and initialize bounds */ memset(pointToCenterLB, 0, sizeof(TYPE) * self->numCenters * numData) ; for (x = 0 ; x < numData ; ++x) { TYPE distance ; /* do the first center */ assignments[x] = 0 ; distance = distFn(self->dimension, data + x * self->dimension, (TYPE*)self->centers + 0) ; pointToClosestCenterUB[x] = distance ; pointToClosestCenterUBIsStrict[x] = VL_TRUE ; pointToCenterLB[0 + x * self->numCenters] = distance ; totDistanceComputationsToInit += 1 ; /* do other centers */ for (c = 1 ; c < self->numCenters ; ++c) { /* Can skip if the center assigned so far is twice as close as its distance to the center under consideration */ if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) * pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances) [c + assignments[x] * self->numCenters]) { continue ; } distance = distFn(self->dimension, data + x * self->dimension, (TYPE*)self->centers + c * self->dimension) ; pointToCenterLB[c + x * self->numCenters] = distance ; totDistanceComputationsToInit += 1 ; if (distance < pointToClosestCenterUB[x]) { pointToClosestCenterUB[x] = distance ; assignments[x] = c ; } } } /* compute UB on energy */ energy = 0 ; for (x = 0 ; x < numData ; ++x) { energy += pointToClosestCenterUB[x] ; } if (self->verbosity) { VL_PRINTF("kmeans: Elkan iter 0: energy = %g, dist. calc. = %d\n", energy, totDistanceComputationsToInit) ; } /* #define SANITY*/ #ifdef SANITY { int xx ; int cc ; TYPE tol = 1e-5 ; VL_PRINTF("inconsistencies after initial assignments:\n"); for (xx = 0 ; xx < numData ; ++xx) { for (cc = 0 ; cc < self->numCenters ; ++cc) { TYPE a = pointToCenterLB[cc + xx * self->numCenters] ; TYPE b = distFn(self->dimension, data + self->dimension * xx, (TYPE*)self->centers + self->dimension * cc) ; if (cc == assignments[xx]) { TYPE z = pointToClosestCenterUB[xx] ; if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n", cc, xx, z, b) ; } if (a>b+tol) VL_PRINTF("LB %d %d = %f > %f\n", cc, xx, a, b) ; } } } #endif /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Iterations */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ for (iteration = 1 ; 1; ++iteration) { vl_size numDistanceComputationsToRefreshUB = 0 ; vl_size numDistanceComputationsToRefreshLB = 0 ; vl_size numDistanceComputationsToRefreshCenterDistances = 0 ; vl_size numDistanceComputationsToNewCenters = 0 ; /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Compute new centers */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ memset(clusterMasses, 0, sizeof(vl_size) * numData) ; for (x = 0 ; x < numData ; ++x) { clusterMasses[assignments[x]] ++ ; } switch (self->distance) { case VlDistanceL2: memset(newCenters, 0, sizeof(TYPE) * self->dimension * self->numCenters) ; for (x = 0 ; x < numData ; ++x) { TYPE * cpt = newCenters + assignments[x] * self->dimension ; TYPE const * xpt = data + x * self->dimension ; for (d = 0 ; d < self->dimension ; ++d) { cpt[d] += xpt[d] ; } } for (c = 0 ; c < self->numCenters ; ++c) { TYPE mass = clusterMasses[c] ; TYPE * cpt = newCenters + c * self->dimension ; for (d = 0 ; d < self->dimension ; ++d) { cpt[d] /= mass ; } } break ; case VlDistanceL1: for (d = 0 ; d < self->dimension ; ++d) { vl_uint32 * perm = permutations + d * numData ; memset(numSeenSoFar, 0, sizeof(vl_size) * self->numCenters) ; for (x = 0; x < numData ; ++x) { c = assignments[perm[x]] ; if (2 * numSeenSoFar[c] < clusterMasses[c]) { newCenters [d + c * self->dimension] = data [d + perm[x] * self->dimension] ; } numSeenSoFar[c] ++ ; } } break ; default: abort(); } /* done compute centers */ /* compute the distance from the old centers to the new centers */ for (c = 0 ; c < self->numCenters ; ++c) { TYPE distance = distFn(self->dimension, newCenters + c * self->dimension, (TYPE*)self->centers + c * self->dimension) ; centerToNewCenterDistances[c] = distance ; numDistanceComputationsToNewCenters += 1 ; } /* make the new centers current */ { TYPE * tmp = self->centers ; self->centers = newCenters ; newCenters = tmp ; } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Reassign points to a centers */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Update distances between centers. */ numDistanceComputationsToRefreshCenterDistances += VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ; for (c = 0 ; c < self->numCenters ; ++c) { nextCenterDistances[c] = (TYPE) VL_INFINITY_D ; for (j = 0 ; j < self->numCenters ; ++j) { if (j == c) continue ; nextCenterDistances[c] = VL_MIN(nextCenterDistances[c], ((TYPE*)self->centerDistances) [j + c * self->numCenters]) ; } } /* Update upper bounds on point-to-closest-center distances based on the center variation. */ for (x = 0 ; x < numData ; ++x) { TYPE a = pointToClosestCenterUB[x] ; TYPE b = centerToNewCenterDistances[assignments[x]] ; if (self->distance == VlDistanceL1) { pointToClosestCenterUB[x] = a + b ; } else { #if (FLT == VL_TYPE_FLOAT) TYPE sqrtab = sqrtf (a * b) ; #else TYPE sqrtab = sqrt (a * b) ; #endif pointToClosestCenterUB[x] = a + b + 2.0 * sqrtab ; } pointToClosestCenterUBIsStrict[x] = VL_FALSE ; } /* Update lower bounds on point-to-center distances based on the center variation. */ for (x = 0 ; x < numData ; ++x) { for (c = 0 ; c < self->numCenters ; ++c) { TYPE a = pointToCenterLB[c + x * self->numCenters] ; TYPE b = centerToNewCenterDistances[c] ; if (a < b) { pointToCenterLB[c + x * self->numCenters] = 0 ; } else { if (self->distance == VlDistanceL1) { pointToCenterLB[c + x * self->numCenters] = a - b ; } else { #if (FLT == VL_TYPE_FLOAT) TYPE sqrtab = sqrtf (a * b) ; #else TYPE sqrtab = sqrt (a * b) ; #endif pointToCenterLB[c + x * self->numCenters] = a + b - 2.0 * sqrtab ; } } } } #ifdef SANITY { int xx ; int cc ; TYPE tol = 1e-5 ; VL_PRINTF("inconsistencies before assignments:\n"); for (xx = 0 ; xx < numData ; ++xx) { for (cc = 0 ; cc < self->numCenters ; ++cc) { TYPE a = pointToCenterLB[cc + xx * self->numCenters] ; TYPE b = distFn(self->dimension, data + self->dimension * xx, (TYPE*)self->centers + self->dimension * cc) ; if (cc == assignments[xx]) { TYPE z = pointToClosestCenterUB[xx] ; if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n", cc, xx, z, b) ; } if (a>b+tol) VL_PRINTF("LB %d %d = %f > %f (assign = %d)\n", cc, xx, a, b, assignments[xx]) ; } } } #endif /* Scan the data and to the reassignments. Use the bounds to skip as many point-to-center distance calculations as possible. */ for (allDone = VL_TRUE, x = 0 ; x < numData ; ++x) { /* A point x sticks with its current center assignmets[x] the UB to d(x, c[assigmnets[x]]) is not larger than half the distance of c[assigments[x]] to any other center c. */ if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) * pointToClosestCenterUB[x] <= nextCenterDistances[assignments[x]]) { continue ; } for (c = 0 ; c < self->numCenters ; ++c) { vl_uint32 cx = assignments[x] ; TYPE distance ; /* The point is not reassigned to a given center c if either: 0 - c is already the assigned center 1 - The UB of d(x, c[assignments[x]]) is smaller than half the distance of c[assigments[x]] to c, OR 2 - The UB of d(x, c[assignmets[x]]) is smaller than the LB of the distance of x to c. */ if (cx == c) { continue ; } if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) * pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances) [c + cx * self->numCenters]) { continue ; } if (pointToClosestCenterUB[x] <= pointToCenterLB [c + x * self->numCenters]) { continue ; } /* If the UB is loose, try recomputing it and test again */ if (! pointToClosestCenterUBIsStrict[x]) { distance = distFn(self->dimension, data + self->dimension * x, (TYPE*)self->centers + self->dimension * cx) ; pointToClosestCenterUB[x] = distance ; pointToClosestCenterUBIsStrict[x] = VL_TRUE ; pointToCenterLB[cx + x * self->numCenters] = distance ; numDistanceComputationsToRefreshUB += 1 ; if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) * pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances) [c + cx * self->numCenters]) { continue ; } if (pointToClosestCenterUB[x] <= pointToCenterLB [c + x * self->numCenters]) { continue ; } } /* Now the UB is strict (equal to d(x, assignments[x])), but we still could not exclude that x should be reassigned to c. We therefore compute the distance, update the LB, and check if a reassigmnet must be made */ distance = distFn(self->dimension, data + x * self->dimension, (TYPE*)self->centers + c * self->dimension) ; numDistanceComputationsToRefreshLB += 1 ; pointToCenterLB[c + x * self->numCenters] = distance ; if (distance < pointToClosestCenterUB[x]) { assignments[x] = c ; pointToClosestCenterUB[x] = distance ; allDone = VL_FALSE ; /* the UB strict flag is already set here */ } } /* assign center */ } /* next data point */ totDistanceComputationsToRefreshUB += numDistanceComputationsToRefreshUB ; totDistanceComputationsToRefreshLB += numDistanceComputationsToRefreshLB ; totDistanceComputationsToRefreshCenterDistances += numDistanceComputationsToRefreshCenterDistances ; totDistanceComputationsToNewCenters += numDistanceComputationsToNewCenters ; #ifdef SANITY { int xx ; int cc ; TYPE tol = 1e-5 ; VL_PRINTF("inconsistencies after assignments:\n"); for (xx = 0 ; xx < numData ; ++xx) { for (cc = 0 ; cc < self->numCenters ; ++cc) { TYPE a = pointToCenterLB[cc + xx * self->numCenters] ; TYPE b = distFn(self->dimension, data + self->dimension * xx, (TYPE*)self->centers + self->dimension * cc) ; if (cc == assignments[xx]) { TYPE z = pointToClosestCenterUB[xx] ; if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n", cc, xx, z, b) ; } if (a>b+tol) VL_PRINTF("LB %d %d = %f > %f (assign = %d)\n", cc, xx, a, b, assignments[xx]) ; } } } #endif /* compute UB on energy */ energy = 0 ; for (x = 0 ; x < numData ; ++x) { energy += pointToClosestCenterUB[x] ; } if (self->verbosity) { vl_size numDistanceComputations = numDistanceComputationsToRefreshUB + numDistanceComputationsToRefreshLB + numDistanceComputationsToRefreshCenterDistances + numDistanceComputationsToNewCenters ; VL_PRINTF("kmeans: Elkan iter %d: energy <= %g, dist. calc. = %d\n", iteration, energy, numDistanceComputations) ; if (self->verbosity > 1) { VL_PRINTF("kmeans: Elkan iter %d: total dist. calc. per type: " "UB: %.1f%% (%d), LB: %.1f%% (%d), " "intra_center: %.1f%% (%d), " "new_center: %.1f%% (%d)\n", iteration, 100.0 * numDistanceComputationsToRefreshUB / numDistanceComputations, numDistanceComputationsToRefreshUB, 100.0 *numDistanceComputationsToRefreshLB / numDistanceComputations, numDistanceComputationsToRefreshLB, 100.0 * numDistanceComputationsToRefreshCenterDistances / numDistanceComputations, numDistanceComputationsToRefreshCenterDistances, 100.0 * numDistanceComputationsToNewCenters / numDistanceComputations, numDistanceComputationsToNewCenters) ; } } /* check termination conditions */ if (iteration >= self->maxNumIterations) { if (self->verbosity) { VL_PRINTF("kmeans: Elkan terminating because maximum number of iterations reached\n") ; } break ; } if (allDone) { if (self->verbosity) { VL_PRINTF("kmeans: Elkan terminating because the algorithm fully converged\n") ; } break ; } } /* next Elkan iteration */ /* compute true energy */ energy = 0 ; for (x = 0 ; x < numData ; ++ x) { vl_uindex cx = assignments [x] ; energy += distFn(self->dimension, data + self->dimension * x, (TYPE*)self->centers + self->dimension * cx) ; totDistanceComputationsToFinalize += 1 ; } { vl_size totDistanceComputations = totDistanceComputationsToInit + totDistanceComputationsToRefreshUB + totDistanceComputationsToRefreshLB + totDistanceComputationsToRefreshCenterDistances + totDistanceComputationsToNewCenters + totDistanceComputationsToFinalize ; double saving = (double)totDistanceComputations / (iteration * self->numCenters * numData) ; if (self->verbosity) { VL_PRINTF("kmeans: Elkan: total dist. calc.: %d (%.2f %% of Lloyd)\n", totDistanceComputations, saving * 100.0) ; } if (self->verbosity > 1) { VL_PRINTF("kmeans: Elkan: total dist. calc. per type: " "init: %.1f%% (%d), UB: %.1f%% (%d), LB: %.1f%% (%d), " "intra_center: %.1f%% (%d), " "new_center: %.1f%% (%d), " "finalize: %.1f%% (%d)\n", 100.0 * totDistanceComputationsToInit / totDistanceComputations, totDistanceComputationsToInit, 100.0 * totDistanceComputationsToRefreshUB / totDistanceComputations, totDistanceComputationsToRefreshUB, 100.0 *totDistanceComputationsToRefreshLB / totDistanceComputations, totDistanceComputationsToRefreshLB, 100.0 * totDistanceComputationsToRefreshCenterDistances / totDistanceComputations, totDistanceComputationsToRefreshCenterDistances, 100.0 * totDistanceComputationsToNewCenters / totDistanceComputations, totDistanceComputationsToNewCenters, 100.0 * totDistanceComputationsToFinalize / totDistanceComputations, totDistanceComputationsToFinalize) ; } } if (permutations) { vl_free(permutations) ; } if (numSeenSoFar) { vl_free(numSeenSoFar) ; } vl_free(distances) ; vl_free(assignments) ; vl_free(clusterMasses) ; vl_free(nextCenterDistances) ; vl_free(pointToClosestCenterUB) ; vl_free(pointToClosestCenterUBIsStrict) ; vl_free(pointToCenterLB) ; vl_free(newCenters) ; vl_free(centerToNewCenterDistances) ; return energy ; }
static void VL_XCAT(_vl_kmeans_seed_centers_plus_plus_, SFX) (VlKMeans * self, TYPE const * data, vl_size dimension, vl_size numData, vl_size numCenters) { vl_uindex x, c ; VlRand * rand = vl_get_rand () ; TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ; TYPE * minDistances = vl_malloc (sizeof(TYPE) * numData) ; #if (FLT == VL_TYPE_FLOAT) VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ; #else VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ; #endif self->dimension = dimension ; self->numCenters = numCenters ; self->centers = vl_malloc (sizeof(TYPE) * dimension * numCenters) ; for (x = 0 ; x < numData ; ++x) { minDistances[x] = (TYPE) VL_INFINITY_D ; } /* select the first point at random */ x = vl_rand_uindex (rand, numData) ; c = 0 ; while (1) { TYPE energy = 0 ; TYPE acc = 0 ; TYPE thresh = (TYPE) vl_rand_real1 (rand) ; memcpy ((TYPE*)self->centers + c * dimension, data + x * dimension, sizeof(TYPE) * dimension) ; c ++ ; if (c == numCenters) break ; VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX) (distances, dimension, (TYPE*)self->centers + (c - 1) * dimension, 1, data, numData, distFn) ; for (x = 0 ; x < numData ; ++x) { minDistances[x] = VL_MIN(minDistances[x], distances[x]) ; energy += minDistances[x] ; } for (x = 0 ; x < numData - 1 ; ++x) { acc += minDistances[x] ; if (acc >= thresh * energy) break ; } } vl_free(distances) ; vl_free(minDistances) ; }
/* driver */ void mexFunction(int nout, mxArray *out[], int nin, const mxArray *in[]) { typedef int unsigned data_t ; vl_bool autoComparison = VL_TRUE ; VlVectorComparisonType comparisonType = VlDistanceL2 ; enum {IN_X = 0, IN_Y} ; enum {OUT_D = 0} ; mwSize numDataX = 0 ; mwSize numDataY = 0 ; mwSize dimension ; mxClassID classId ; /* for option parsing */ int opt ; int next ; mxArray const *optarg ; VL_USE_MATLAB_ENV ; if (nout > 1) { vlmxError(vlmxErrTooManyOutputArguments, NULL) ; } if (nin < 1) { vlmxError(vlmxErrNotEnoughInputArguments, NULL) ; } if (! (vlmxIsMatrix (in[IN_X],-1,-1) && vlmxIsReal(in[IN_X]))) { vlmxError(vlmxErrInvalidArgument, "X must be a real matrix.") ; } next = 1 ; classId = mxGetClassID(in[IN_X]) ; dimension = mxGetM(in[IN_X]) ; numDataX = mxGetN(in[IN_X]) ; if (nin > 1 && vlmxIsMatrix (in[IN_Y],-1,-1) && vlmxIsReal(in[IN_Y])) { next = 2 ; autoComparison = VL_FALSE ; numDataY = mxGetN(in[IN_Y]) ; if (mxGetClassID(in[IN_Y]) != classId) { vlmxError(vlmxErrInvalidArgument, "X and Y must have the same class.") ; } if (dimension != mxGetM(in[IN_Y])) { vlmxError(vlmxErrInvalidArgument, "X and Y must have the same number of rows.") ; } } if (classId != mxSINGLE_CLASS && classId != mxDOUBLE_CLASS) { vlmxError(vlmxErrInvalidArgument, "X must be either of class SINGLE or DOUBLE."); } while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0) { switch (opt) { case opt_L2 : comparisonType = VlDistanceL2 ; break ; case opt_L1 : comparisonType = VlDistanceL1 ; break ; case opt_CHI2 : comparisonType = VlDistanceChi2 ; break ; case opt_HELL : comparisonType = VlDistanceHellinger ; break ; case opt_JS : comparisonType = VlDistanceJS ; break ; case opt_KL2 : comparisonType = VlKernelL2 ; break ; case opt_KL1 : comparisonType = VlKernelL1 ; break ; case opt_KCHI2 : comparisonType = VlKernelChi2 ; break ; case opt_KHELL : comparisonType = VlKernelHellinger ; break ; case opt_KJS : comparisonType = VlKernelJS ; break ; default: abort() ; } } /* allocate output */ { mwSize dims [2] ; dims[0] = numDataX ; dims[1] = autoComparison ? numDataX : numDataY ; out[OUT_D] = mxCreateNumericArray (2, dims, classId, mxREAL) ; } /* If either numDataX or numDataY are null, their data pointers are null as well. This may confuse vl_eval_vector_comparison_on_all_pairs_*, so we intercept this as a special case. The same is true if dimension is null. */ if (numDataX == 0 || (! autoComparison && numDataY == 0)) { return ; } if (dimension == 0) { return ; } /* make calculation */ switch (classId) { case mxSINGLE_CLASS: { VlFloatVectorComparisonFunction f = vl_get_vector_comparison_function_f (comparisonType) ; if (autoComparison) { vl_eval_vector_comparison_on_all_pairs_f ((float*)mxGetData(out[OUT_D]), dimension, (float*)mxGetData(in[IN_X]), numDataX, 0, 0, f) ; } else { vl_eval_vector_comparison_on_all_pairs_f ((float*)mxGetData(out[OUT_D]), dimension, (float*)mxGetData(in[IN_X]), numDataX, (float*)mxGetData(in[IN_Y]), numDataY, f) ; } } break ; case mxDOUBLE_CLASS: { VlDoubleVectorComparisonFunction f = vl_get_vector_comparison_function_d (comparisonType) ; if (autoComparison) { vl_eval_vector_comparison_on_all_pairs_d ((double*)mxGetData(out[OUT_D]), dimension, (double*)mxGetData(in[IN_X]), numDataX, 0, 0, f) ; } else { vl_eval_vector_comparison_on_all_pairs_d ((double*)mxGetData(out[OUT_D]), dimension, (double*)mxGetData(in[IN_X]), numDataX, (double*)mxGetData(in[IN_Y]), numDataY, f) ; } } break ; default: abort() ; } }