Пример #1
0
void kmeans::calculate_total_wce(void) {
    double & wce = m_ptr_result->wce();
    for (std::size_t i = 0; i < m_ptr_result->clusters().size(); i++) {
        const auto & current_cluster = m_ptr_result->clusters().at(i);
        const auto & cluster_center = m_ptr_result->centers().at(i);

        for (const auto & cluster_point : current_cluster) {
            wce += m_metric(m_ptr_data->at(cluster_point), cluster_center);
        }
    }
}
Пример #2
0
	/// Resemblance coefficient for users
	/// \param[in] user1 First user index in the Rating matrix
	/// \param[in] user2 Second user index in the Rating matrix
	/// \return User's resemblance coefficient
	float operator()(size_t user1, size_t user2)
	{
		//std::clog << "user_resemblance_t::operator()(user1 = " << user1 << ", user2 = " << user2 << ")" << std::endl;
		if (bool(m_resemblance_mask(user1, user2)) == false)
		{
			//Note: m_resemblance matrix is symmetric - 
			// we can even store only upper triangle
			m_resemblance(user1, user2) = m_metric(m_ratings.get_row(user1), 
												   m_ratings.get_row(user2));
			m_resemblance(user2, user1) = m_resemblance(user1, user2);
			m_resemblance_mask(user1, user2) = true;
			m_resemblance_mask(user2, user1) = true;
		}
		return m_resemblance(user1, user2);
	}
Пример #3
0
void kmeans::assign_point_to_cluster(const std::size_t p_index_point, const dataset & p_centers, std::vector<std::size_t> & p_clusters) {
    double    minimum_distance = std::numeric_limits<double>::max();
    size_t    suitable_index_cluster = 0;

    for (size_t index_cluster = 0; index_cluster < p_centers.size(); index_cluster++) {
        double distance = m_metric(p_centers[index_cluster], (*m_ptr_data)[p_index_point]);

        if (distance < minimum_distance) {
            minimum_distance = distance;
            suitable_index_cluster = index_cluster;
        }
    }

    p_clusters[p_index_point] = suitable_index_cluster;
}
Пример #4
0
double kmeans::update_center(const cluster & p_cluster, point & p_center) {
    point total(p_center.size(), 0.0);

    /* for each object in cluster */
    for (auto object_index : p_cluster) {
        /* for each dimension */
        for (size_t dimension = 0; dimension < total.size(); dimension++) {
            total[dimension] += (*m_ptr_data)[object_index][dimension];
        }
    }

    /* average for each dimension */
    for (size_t dimension = 0; dimension < total.size(); dimension++) {
        total[dimension] = total[dimension] / p_cluster.size();
    }

    const double change = m_metric(p_center, total);

    p_center = std::move(total);
    return change;
}
Пример #5
0
double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) {
    const dataset & data = *m_ptr_data;
    const std::size_t dimension = data[0].size();

    std::vector<point> prev_medians(medians);

    medians.clear();
    medians.resize(clusters.size(), point(dimension, 0.0));

    double maximum_change = 0.0;

    for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) {
        calculate_median(clusters[index_cluster], medians[index_cluster]);

        double change = m_metric(prev_medians[index_cluster], medians[index_cluster]);
        if (change > maximum_change) {
            maximum_change = change;
        }
    }

    return maximum_change;
}
Пример #6
0
void kmedians::update_clusters(const dataset & medians, cluster_sequence & clusters) {
    const dataset & data = *m_ptr_data;

    clusters.clear();
    clusters.resize(medians.size());

    for (size_t index_point = 0; index_point < data.size(); index_point++) {
        size_t index_cluster_optim = 0;
        double distance_optim = std::numeric_limits<double>::max();

        for (size_t index_cluster = 0; index_cluster < medians.size(); index_cluster++) {
            double distance = m_metric(data[index_point], medians[index_cluster]);
            if (distance < distance_optim) {
                index_cluster_optim = index_cluster;
                distance_optim = distance;
            }
        }

        clusters[index_cluster_optim].push_back(index_point);
    }

    erase_empty_clusters(clusters);
}