void kmeans_compressor::bicriteria_to_coreset( wplist& src, wplist& bicriteria, csize_t dstsize, wplist& dst) { if (bicriteria.size() == 0) { dst = src; return; } double weight_sum = 0; double squared_min_dist_sum = 0; for (wplist::iterator it = src.begin(); it != src.end(); ++it) { std::pair<int, double> m = min_dist(*it, bicriteria); (*it).free_long = m.first; (*it).free_double = m.second; bicriteria.at((*it).free_long).free_double += (*it).weight; squared_min_dist_sum += pow((*it).free_double, 2) * (*it).weight; weight_sum += (*it).weight; } std::vector<double> weights; double sumw = 0; double prob = 0; for (wplist::iterator it = src.begin(); it != src.end(); ++it) { weighted_point p = *it; weighted_point bp = bicriteria.at(p.free_long); prob = get_probability(p, bp, weight_sum, squared_min_dist_sum); (*it).free_double = prob; weights.push_back(prob); sumw += prob; } discrete_distribution d(weights.begin(), weights.end()); std::vector<size_t> ind(dstsize); std::generate(ind.begin(), ind.end(), d); for (std::vector<size_t>::iterator it = ind.begin(); it != ind.end(); ++it) { weighted_point sample = src.at(*it); sample.weight = 1.0 / dstsize * sumw / sample.free_double * sample.weight; sample.free_double = 0; sample.free_long = 0; dst.push_back(sample); } }
void kmeans_compressor::get_bicriteria( const wplist& src, csize_t bsize, csize_t dstsize, wplist& dst) { timer_start(); dst.clear(); wplist resid = src; vector<double> weights(src.size()); double r = (1 - exp(bsize*(log(bsize)-log(src.size()))/dstsize)) / 2; r = max(0.1, r); std::vector<size_t> ind(bsize); while (resid.size() > 1 && dst.size() < dstsize) { timer_start(); weights.resize(resid.size()); for (wplist::iterator it = resid.begin(); it != resid.end(); ++it) { weights[it - resid.begin()] = it->weight; } discrete_distribution d(weights.begin(), weights.end()); std::generate(ind.begin(), ind.end(), d); std::sort(ind.begin(), ind.end()); std::vector<size_t>::iterator it = std::unique(ind.begin(), ind.end()); ind.erase(it, ind.end()); for (it = ind.begin(); it != ind.end(); ++it) { weighted_point p = resid[*it]; p.free_long = 0; dst.push_back(p); } for (wplist::iterator itr = resid.begin(); itr != resid.end(); ++itr) { (*itr).free_double = min_dist(*itr, dst).second; } std::sort(resid.begin(), resid.end(), compare_weight); csize_t size = (csize_t)std::min(static_cast<double>(resid.size()), static_cast<double>(resid.size()*r)); if (size == 0) { size = 1; } resid.resize(size); } }