void kmeans::process(const dataset & p_data, const index_sequence & p_indexes, cluster_data & p_result) { m_ptr_data = &p_data; m_ptr_indexes = &p_indexes; m_ptr_result = (kmeans_data *) &p_result; if (p_data[0].size() != m_initial_centers[0].size()) { throw std::invalid_argument("Dimension of the input data and dimension of the initial cluster centers must be the same."); } m_ptr_result->centers().assign(m_initial_centers.begin(), m_initial_centers.end()); if (m_ptr_result->is_observed()) { cluster_sequence sequence; update_clusters(m_initial_centers, sequence); m_ptr_result->evolution_centers().push_back(m_initial_centers); m_ptr_result->evolution_clusters().push_back(sequence); } double current_change = std::numeric_limits<double>::max(); for(std::size_t iteration = 0; iteration < m_itermax && current_change > m_tolerance; iteration++) { update_clusters(m_ptr_result->centers(), m_ptr_result->clusters()); current_change = update_centers(m_ptr_result->clusters(), m_ptr_result->centers()); if (m_ptr_result->is_observed()) { m_ptr_result->evolution_centers().push_back(m_ptr_result->centers()); m_ptr_result->evolution_clusters().push_back(m_ptr_result->clusters()); } } calculate_total_wce(); }
void kmedians::process(const dataset & data, cluster_data & output_result) { m_ptr_data = &data; m_ptr_result = (kmedians_data *) &output_result; if (data[0].size() != m_initial_medians[0].size()) { throw std::invalid_argument("kmedians: dimension of the input data and dimension of the initial medians must be equal."); } m_ptr_result->medians() = m_initial_medians; double changes = std::numeric_limits<double>::max(); double prev_changes = 0.0; std::size_t counter_repeaters = 0; for (std::size_t iteration = 0; (iteration < m_max_iter) && (changes > m_tolerance) && (counter_repeaters < 10); iteration++) { update_clusters(m_ptr_result->medians(), m_ptr_result->clusters()); changes = update_medians(m_ptr_result->clusters(), m_ptr_result->medians()); double change_difference = std::abs(changes - prev_changes); if (change_difference < THRESHOLD_CHANGE) { counter_repeaters++; } else { counter_repeaters = 0; } prev_changes = changes; } m_ptr_data = nullptr; m_ptr_result = nullptr; }
void kmedians::process(const std::vector<point> & data) { m_ptr_data = (std::vector<point> *) &data; if (data[0].size() != m_medians[0].size()) { throw std::runtime_error("CCORE [kmedians]: dimension of the input data and dimension of the initial cluster medians must be equal."); } m_clusters.clear(); double stop_condition = m_tolerance * m_tolerance; double changes = 0.0; double prev_changes = 0.0; size_t counter_repeaters = 0; do { update_clusters(); changes = update_medians(); double change_difference = abs(changes - prev_changes); if (change_difference < 0.000001) { counter_repeaters++; } else { counter_repeaters = 0; } prev_changes = changes; } while ((changes > stop_condition) && (counter_repeaters < 10)); m_ptr_data = nullptr; }
void xmeans::improve_parameters(std::vector<std::vector<unsigned int> *> * improved_clusters, std::vector<std::vector<double> > * improved_centers, const std::vector<unsigned int> * const available_indexes) { double current_change = std::numeric_limits<double>::max(); while(current_change > tolerance) { update_clusters(improved_clusters, improved_centers, available_indexes); current_change = update_centers(improved_clusters, improved_centers); } }
void kmeans::process(void) { double current_change = std::numeric_limits<double>::max(); while(current_change > tolerance) { update_clusters(); current_change = update_centers(); } }
void CLUSTER::cluster_iso() { bool ITER = false; int ITER_N = 0; float LO_Q = (k+1)/2; float HI_Q = 2*k; //print(); while (ITER_N++ < MAX_ITER) { //cout << "< Updating membership >" << endl; update_membership(); //print(); //cout << "< Updating clusters >" << endl; float AV_DIST = update_clusters(); //print(); if (csize < LO_Q || (csize < HI_Q && ITER)) { int u = csize; //cout << "< Number of clusters: " << csize << " >" << endl; for (int v = 0; u-- > 0; v++) { if (centers[v]->deviation > BREAKING_POINT) { if(LO_Q > csize || (centers[v]->distance > AV_DIST && centers[v]->size > 2*MIN_SIZE)){ //cout << "splitting " << centers[v]->loc << endl; centers[add_cluster(centers[v]->loc + 0.01)]->size = 0; centers[v]->loc -= 0.01; last_action = _split; } else { if (ITER_N > 1) { if (last_action != _lump) { last_action = try_lumping(); } else { if (csize == 0) break; else continue; } } else { last_action = try_lumping(); } } } } } else { last_action = try_lumping(); } ITER = 1 - ITER; } }
void CLUSTER::cluster_iso2() { if (one_to_one()) return; bool ITER = false; unsigned int ITER_N = 0; float LO_Q = (k+1)/2; float HI_Q = 2*k; int last = 0; //int last_csize = -1; //print(); last_action = _split; while (last < 4) { // cout << "LOOP " << ITER_N << endl; last_action = _nothing; //cout << "< Updating membership >" << endl; update_membership(); //cout << "< Updating clusters >" << endl; float AV_DIST = update_clusters(); //print(); //print(); if (csize < HI_Q) { int u = csize; //cout << "< Number of clusters: " << csize << " >" << endl; for (int v = 0; u-- > 0; v++) { if (centers[v]->deviation > BREAKING_POINT) { // if(LO_Q > csize || // (centers[v]->distance > AV_DIST && centers[v]->size > 2*MIN_SIZE)){ //cout << "splitting " << centers[v]->loc << " BR " << BREAKING_POINT << endl; centers[add_cluster(centers[v]->loc + 0.0001)]->size = 0; centers[v]->loc -= 0.00001; last = 0; // } } } } ITER = 1 - ITER; last++; if (last == 4 && csize < k) { float OBP = BREAKING_POINT; float DV = 0; //find average deviation and set BREAKING_POINT to it for (int v = 0; v < csize; v++) DV += centers[v]->deviation; DV /= csize; //make a little smaller then average to be able break a single cluster BREAKING_POINT = DV-0.0000001; //cout << "adjusting BREAKING_POINT " << BREAKING_POINT << endl; //take another shot _only_ if this is the first time this BP is used if (OBP != BREAKING_POINT) last = 0; //there are no divisible clusters, quit if (DV == 0) last = 4; } ITER_N++; // cout << "turn " << ITER_N << endl; } if (csize < k) { //divide largest clusters in 2 ? } if (csize > k) { int EXTRA = csize - k; while (EXTRA > 0) { update_membership(); update_clusters(); cluster_insertion_sort(); float s = ::distance(centers[0]->loc, centers[1]->loc); // * // (centers[0]->size + centers[1]->size); int l = 0; for (int v = 1; v < csize-1; v++) { float m = ::distance(centers[v]->loc, centers[v+1]->loc); if (m < s) { s = m; l = v; } } EXTRA--; centers[l]->loc = (centers[l]->loc + centers[l+1]->loc)/2; erase_cluster(l+1); } } update_membership(); update_clusters(); }