void kmeans_1d_dp(const double *x, const size_t N, const double *y, size_t Kmin, size_t Kmax, int* cluster, double* centers, double* withinss, int* size) { // Input: // x -- an array of double precision numbers, not necessarily sorted // Kmin -- the minimum number of clusters expected // Kmax -- the maximum number of clusters expected // NOTE: All vectors in this program is considered starting at position 0. std::vector<double> x_sorted(N); std::vector<double> y_sorted; auto is_equally_weighted(true); std::vector<size_t> order(N); //Number generation using lambda function, not supported by all g++: //std::size_t n(0); //std::generate(order.begin(), order.end(), [&]{ return n++; }); for(size_t i=0; i<order.size(); ++i) { order[i] = i; } // Sort the index of x in increasing order of x // Sorting using lambda function, not supported by all g++ versions: // std::sort(order.begin(), order.end(), // [&](size_t i1, size_t i2) { return x[i1] < x[i2]; } ); struct CompareIndex { const double * m_x; CompareIndex(const double * x) : m_x(x) {} bool operator() (size_t i, size_t j) { return (m_x[i] < m_x[j]);} } compi(x); std::sort(order.begin(), order.end(), compi); for(size_t i=0; i<order.size(); ++i) { x_sorted[i] = x[order[i]]; } // check to see if unequal weight is provided if(y != NULL) { is_equally_weighted = true; for(size_t i=1; i<N; ++i) { if(y[i] != y[i-1]) { is_equally_weighted = false; break; } } } if(! is_equally_weighted) { y_sorted.resize(N); for(size_t i=0; i<order.size(); ++i) { y_sorted[i] = y[order[i]]; } } const size_t nUnique = numberOfUnique(x_sorted.begin(), x_sorted.end()); Kmax = nUnique < Kmax ? nUnique : Kmax; if(nUnique > 1) { // The case when not all elements are equal. std::vector< std::vector< double > > S( Kmax, std::vector<double>(N) ); std::vector< std::vector< size_t > > J( Kmax, std::vector<size_t>(N) ); size_t Kopt; // Fill in dynamic programming matrix if(is_equally_weighted) { fill_dp_matrix(x_sorted, S, J); // Choose an optimal number of levels between Kmin and Kmax Kopt = select_levels(x_sorted, J, Kmin, Kmax); } else { fill_weighted_dp_matrix(x_sorted, y_sorted, S, J); // Choose an optimal number of levels between Kmin and Kmax Kopt = select_levels_weighted(x_sorted, y_sorted, J, Kmin, Kmax); } if (Kopt < Kmax) { // Reform the dynamic programming matrix S and J J.erase(J.begin() + Kopt, J.end()); } std::vector<int> cluster_sorted(N); // Backtrack to find the clusters beginning and ending indices if(is_equally_weighted) { backtrack(x_sorted, J, &cluster_sorted[0], centers, withinss, size); } else { backtrack_weighted(x_sorted, y_sorted, J, &cluster_sorted[0], centers, withinss, size); } for(size_t i = 0; i < N; ++i) { // Obtain clustering on data in the original order cluster[order[i]] = cluster_sorted[i]; } } else { // A single cluster that contains all elements for(size_t i=0; i<N; ++i) { cluster[i] = 0; } centers[0] = x[0]; withinss[0] = 0.0; size[0] = N * (is_equally_weighted ? 1 : y[0]); } } //end of kmeans_1d_dp()
ClusterResult kmeans_1d_dp(const std::vector<double> & x, size_t Kmin, size_t Kmax) { // Input: // x -- a vector of numbers, not necessarily sorted // Kmin -- the minimum number of clusters expected // Kmax -- the maximum number of clusters expected // NOTE: All vectors in this program is considered starting at position 1, // position 0 is not used. ClusterResult result; const size_t N = x.size() - 1; // N: is the size of input vector std::vector<double> x_sorted(x); std::sort(x_sorted.begin()+1, x_sorted.end()); const size_t nUnique = numberOfUnique(x_sorted.begin()+1, x_sorted.end()); Kmax = nUnique < Kmax ? nUnique : Kmax; if(nUnique > 1) { // The case when not all elements are equal. std::vector< std::vector< double > > D( (Kmax + 1), std::vector<double>(N + 1) ); std::vector< std::vector< size_t > > B( (Kmax + 1), std::vector<size_t>(N + 1) ); // Fill in dynamic programming matrix fill_dp_matrix(x_sorted, D, B); // Choose an optimal number of levels between Kmin and Kmax size_t Kopt = select_levels(x_sorted, B, Kmin, Kmax); if (Kopt < Kmax) { // Reform the dynamic programming matrix D and B B.erase(B.begin()+ Kopt + 1, B.end()); } // Backtrack to find the clusters beginning and ending indices backtrack(x_sorted, B, result); // Perform clustering on the original data for(size_t i = 1; i < x.size(); ++i) { size_t indexLeft = 1; size_t indexRight; for (size_t k = 1; k < result.size.size(); ++k) { indexRight = indexLeft + result.size[k] - 1; if ( x[i] <= x_sorted[indexRight] ) { result.cluster[i] = k; break; } indexLeft = indexRight + 1; } } } else { // A single cluster that contains all elements result.nClusters = 1; result.cluster = std::vector<size_t>(N + 1, 1); result.centers.resize(2); result.withinss.resize(2); result.size.resize(2); result.centers[1] = x[1]; result.withinss[1] = 0.0; result.size[1] = N; } return result; } //end of kmeans_1d_dp()