variant _1_kMeans(std::vector<std::vector<double>> img, std::vector<std::vector<double>> old_centers_v, int K, double epsilon kMeans_DOWN__1_kMeans_decl) { cv::Mat centers(cv::Scalar(0)), old_centers(old_centers_v); cv::Mat data0(img); bool isrow = data0.rows == 1 && data0.channels() > 1; int N = !isrow ? data0.rows : data0.cols; int dims = (!isrow ? data0.cols : 1) * data0.channels(); int type = data0.depth(); if (!(data0.dims <= 2 && type == CV_32F && K > 0 && N >= K)) { error("Cannot perform K-means algorithm for this configuration" kMeans_DOWN__1_kMeans_error_use); return; } cv::Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step)); cv::Mat temp(1, dims, type); std::vector<int> counters(K, 0); const float* sample = data.ptr<float>(0); double max_center_shift = 0; for (int k = 0; k < K; ++k) { if (counters[k] != 0) continue; int max_k = 0; for (int k1 = 1; k1 < K; ++k1) { if (counters[max_k] < counters[k1]) max_k = k1; } double max_dist = 0; int farthest_i = -1; float* new_center = centers.ptr<float>(k); float* old_center = centers.ptr<float>(max_k); float* _old_center = temp.ptr<float>(); float scale = 1.f/counters[max_k]; for (int j = 0; j < dims; ++j) _old_center[j] = old_center[j]*scale; for (int i = 0; i < N; ++i) { sample = data.ptr<float>(i); double dist = cv::normL2Sqr_(sample, _old_center, dims); if (max_dist <= dist) { max_dist = dist; farthest_i = i; } } counters[max_k]--; counters[k]++; sample = data.ptr<float>(farthest_i); for (int j = 0; j < dims; ++j) { old_center[j] -= sample[j]; new_center[j] += sample[j]; } } for (int k = 0; k < K; ++k) { float* center = centers.ptr<float>(k); if (counters[k] == 0) { error("For some reason one of the clusters is empty" kMeans_DOWN__1_kMeans_error_use); return; } float scale = 1.f/counters[k]; for (int j = 0; j < dims; ++j) center[j] *= scale; double dist = 0; const float* old_center = old_centers.ptr<float>(k); for (int j = 0; j < dims; ++j) { double t = center[j] - old_center[j]; dist += t * t; } max_center_shift = std::max(max_center_shift, dist); } std::vector<std::vector<double>> _centers; centers.copyTo(_centers); if (max_center_shift <= epsilon) { result(_centers kMeans_DOWN__1_kMeans_result_use); } else { _1_loop(img, _centers, K, epsilon kMeans_DOWN__1_kMeans_loop_use); } }
double cv::kmeans( InputArray _data, int K, InputOutputArray _bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray _centers ) { const int SPP_TRIALS = 3; Mat data0 = _data.getMat(); bool isrow = data0.rows == 1; int N = isrow ? data0.cols : data0.rows; int dims = (isrow ? 1 : data0.cols)*data0.channels(); int type = data0.depth(); attempts = std::max(attempts, 1); CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); CV_Assert( N >= K ); Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step)); _bestLabels.create(N, 1, CV_32S, -1, true); Mat _labels, best_labels = _bestLabels.getMat(); if( flags & CV_KMEANS_USE_INITIAL_LABELS ) { CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && best_labels.type() == CV_32S && best_labels.isContinuous()); best_labels.copyTo(_labels); } else { if( !((best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && best_labels.type() == CV_32S && best_labels.isContinuous())) best_labels.create(N, 1, CV_32S); _labels.create(best_labels.size(), best_labels.type()); } int* labels = _labels.ptr<int>(); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); std::vector<int> counters(K); std::vector<Vec2f> _box(dims); Vec2f* box = &_box[0]; double best_compactness = DBL_MAX, compactness = 0; RNG& rng = theRNG(); int a, iter, i, j, k; if( criteria.type & TermCriteria::EPS ) criteria.epsilon = std::max(criteria.epsilon, 0.); else criteria.epsilon = FLT_EPSILON; criteria.epsilon *= criteria.epsilon; if( criteria.type & TermCriteria::COUNT ) criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); else criteria.maxCount = 100; if( K == 1 ) { attempts = 1; criteria.maxCount = 2; } const float* sample = data.ptr<float>(0); for( j = 0; j < dims; j++ ) box[j] = Vec2f(sample[j], sample[j]); for( i = 1; i < N; i++ ) { sample = data.ptr<float>(i); for( j = 0; j < dims; j++ ) { float v = sample[j]; box[j][0] = std::min(box[j][0], v); box[j][1] = std::max(box[j][1], v); } } for( a = 0; a < attempts; a++ ) { double max_center_shift = DBL_MAX; for( iter = 0;; ) { swap(centers, old_centers); if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) { if( flags & KMEANS_PP_CENTERS ) generateCentersPP(data, centers, K, rng, SPP_TRIALS); else { for( k = 0; k < K; k++ ) generateRandomCenter(_box, centers.ptr<float>(k), rng); } } else { if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) { for( i = 0; i < N; i++ ) CV_Assert( (unsigned)labels[i] < (unsigned)K ); } // compute centers centers = Scalar(0); for( k = 0; k < K; k++ ) counters[k] = 0; for( i = 0; i < N; i++ ) { sample = data.ptr<float>(i); k = labels[i]; float* center = centers.ptr<float>(k); j=0; #if CV_ENABLE_UNROLLED for(; j <= dims - 4; j += 4 ) { float t0 = center[j] + sample[j]; float t1 = center[j+1] + sample[j+1]; center[j] = t0; center[j+1] = t1; t0 = center[j+2] + sample[j+2]; t1 = center[j+3] + sample[j+3]; center[j+2] = t0; center[j+3] = t1; } #endif for( ; j < dims; j++ ) center[j] += sample[j]; counters[k]++; } if( iter > 0 ) max_center_shift = 0; for( k = 0; k < K; k++ ) { if( counters[k] != 0 ) continue; // if some cluster appeared to be empty then: // 1. find the biggest cluster // 2. find the farthest from the center point in the biggest cluster // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. int max_k = 0; for( int k1 = 1; k1 < K; k1++ ) { if( counters[max_k] < counters[k1] ) max_k = k1; } double max_dist = 0; int farthest_i = -1; float* new_center = centers.ptr<float>(k); float* old_center = centers.ptr<float>(max_k); float* _old_center = temp.ptr<float>(); // normalized float scale = 1.f/counters[max_k]; for( j = 0; j < dims; j++ ) _old_center[j] = old_center[j]*scale; for( i = 0; i < N; i++ ) { if( labels[i] != max_k ) continue; sample = data.ptr<float>(i); double dist = normL2Sqr(sample, _old_center, dims); if( max_dist <= dist ) { max_dist = dist; farthest_i = i; } } counters[max_k]--; counters[k]++; labels[farthest_i] = k; sample = data.ptr<float>(farthest_i); for( j = 0; j < dims; j++ ) { old_center[j] -= sample[j]; new_center[j] += sample[j]; } } for( k = 0; k < K; k++ ) { float* center = centers.ptr<float>(k); CV_Assert( counters[k] != 0 ); float scale = 1.f/counters[k]; for( j = 0; j < dims; j++ ) center[j] *= scale; if( iter > 0 ) { double dist = 0; const float* old_center = old_centers.ptr<float>(k); for( j = 0; j < dims; j++ ) { double t = center[j] - old_center[j]; dist += t*t; } max_center_shift = std::max(max_center_shift, dist); } } } if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) break; // assign labels Mat dists(1, N, CV_64F); double* dist = dists.ptr<double>(0); parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers)); compactness = 0; for( i = 0; i < N; i++ ) { compactness += dist[i]; } } if( compactness < best_compactness ) { best_compactness = compactness; if( _centers.needed() ) centers.copyTo(_centers); _labels.copyTo(best_labels); } } return best_compactness; }