void kmeans::calculate_total_wce(void) { double & wce = m_ptr_result->wce(); for (std::size_t i = 0; i < m_ptr_result->clusters().size(); i++) { const auto & current_cluster = m_ptr_result->clusters().at(i); const auto & cluster_center = m_ptr_result->centers().at(i); for (const auto & cluster_point : current_cluster) { wce += m_metric(m_ptr_data->at(cluster_point), cluster_center); } } }
/// Resemblance coefficient for users /// \param[in] user1 First user index in the Rating matrix /// \param[in] user2 Second user index in the Rating matrix /// \return User's resemblance coefficient float operator()(size_t user1, size_t user2) { //std::clog << "user_resemblance_t::operator()(user1 = " << user1 << ", user2 = " << user2 << ")" << std::endl; if (bool(m_resemblance_mask(user1, user2)) == false) { //Note: m_resemblance matrix is symmetric - // we can even store only upper triangle m_resemblance(user1, user2) = m_metric(m_ratings.get_row(user1), m_ratings.get_row(user2)); m_resemblance(user2, user1) = m_resemblance(user1, user2); m_resemblance_mask(user1, user2) = true; m_resemblance_mask(user2, user1) = true; } return m_resemblance(user1, user2); }
void kmeans::assign_point_to_cluster(const std::size_t p_index_point, const dataset & p_centers, std::vector<std::size_t> & p_clusters) { double minimum_distance = std::numeric_limits<double>::max(); size_t suitable_index_cluster = 0; for (size_t index_cluster = 0; index_cluster < p_centers.size(); index_cluster++) { double distance = m_metric(p_centers[index_cluster], (*m_ptr_data)[p_index_point]); if (distance < minimum_distance) { minimum_distance = distance; suitable_index_cluster = index_cluster; } } p_clusters[p_index_point] = suitable_index_cluster; }
double kmeans::update_center(const cluster & p_cluster, point & p_center) { point total(p_center.size(), 0.0); /* for each object in cluster */ for (auto object_index : p_cluster) { /* for each dimension */ for (size_t dimension = 0; dimension < total.size(); dimension++) { total[dimension] += (*m_ptr_data)[object_index][dimension]; } } /* average for each dimension */ for (size_t dimension = 0; dimension < total.size(); dimension++) { total[dimension] = total[dimension] / p_cluster.size(); } const double change = m_metric(p_center, total); p_center = std::move(total); return change; }
double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) { const dataset & data = *m_ptr_data; const std::size_t dimension = data[0].size(); std::vector<point> prev_medians(medians); medians.clear(); medians.resize(clusters.size(), point(dimension, 0.0)); double maximum_change = 0.0; for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) { calculate_median(clusters[index_cluster], medians[index_cluster]); double change = m_metric(prev_medians[index_cluster], medians[index_cluster]); if (change > maximum_change) { maximum_change = change; } } return maximum_change; }
void kmedians::update_clusters(const dataset & medians, cluster_sequence & clusters) { const dataset & data = *m_ptr_data; clusters.clear(); clusters.resize(medians.size()); for (size_t index_point = 0; index_point < data.size(); index_point++) { size_t index_cluster_optim = 0; double distance_optim = std::numeric_limits<double>::max(); for (size_t index_cluster = 0; index_cluster < medians.size(); index_cluster++) { double distance = m_metric(data[index_point], medians[index_cluster]); if (distance < distance_optim) { index_cluster_optim = index_cluster; distance_optim = distance; } } clusters[index_cluster_optim].push_back(index_point); } erase_empty_clusters(clusters); }