double kmedians::update_medians() { const std::vector<point> & data = *m_ptr_data; const size_t dimension = data[0].size(); std::vector<point> prev_medians(m_medians); m_medians.clear(); m_medians.resize(m_clusters.size(), point(dimension, 0.0)); double maximum_change = 0.0; for (size_t index_cluster = 0; index_cluster < m_clusters.size(); index_cluster++) { for (size_t index_dimension = 0; index_dimension < dimension; index_dimension++) { cluster & current_cluster = m_clusters[index_cluster]; std::sort(current_cluster.begin(), current_cluster.end(), [this](unsigned int index_object1, unsigned int index_object2) { return (*m_ptr_data)[index_object1] > (*m_ptr_data)[index_object2]; }); size_t relative_index_median = (size_t) floor(current_cluster.size() / 2.0); size_t index_median = current_cluster[relative_index_median]; if (current_cluster.size() % 2) { size_t index_median_second = current_cluster[relative_index_median + 1]; m_medians[index_cluster][index_dimension] = (data[index_median][index_dimension] + data[index_median_second][index_dimension]) / 2.0; } else { m_medians[index_cluster][index_dimension] = data[index_median][index_dimension]; } } double change = euclidean_distance_sqrt(&prev_medians[index_cluster], &m_medians[index_cluster]); if (change > maximum_change) { maximum_change = change; } } return maximum_change; }
double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) { const dataset & data = *m_ptr_data; const std::size_t dimension = data[0].size(); std::vector<point> prev_medians(medians); medians.clear(); medians.resize(clusters.size(), point(dimension, 0.0)); double maximum_change = 0.0; for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) { calculate_median(clusters[index_cluster], medians[index_cluster]); double change = m_metric(prev_medians[index_cluster], medians[index_cluster]); if (change > maximum_change) { maximum_change = change; } } return maximum_change; }