示例#1
0
void kmeans_compressor::bicriteria_to_coreset(
    wplist& src,
    wplist& bicriteria,
    csize_t dstsize,
    wplist& dst) {
  if (bicriteria.size() == 0) {
    dst = src;
    return;
  }
  double weight_sum = 0;
  double squared_min_dist_sum = 0;
  for (wplist::iterator it = src.begin(); it != src.end(); ++it) {
    std::pair<int, double> m = min_dist(*it, bicriteria);
    (*it).free_long = m.first;
    (*it).free_double = m.second;
    bicriteria.at((*it).free_long).free_double += (*it).weight;
    squared_min_dist_sum += pow((*it).free_double, 2) * (*it).weight;
    weight_sum += (*it).weight;
  }
  std::vector<double> weights;
  double sumw = 0;
  double prob = 0;
  for (wplist::iterator it = src.begin(); it != src.end(); ++it) {
    weighted_point p = *it;
    weighted_point bp = bicriteria.at(p.free_long);
    prob = get_probability(p, bp, weight_sum, squared_min_dist_sum);
    (*it).free_double = prob;
    weights.push_back(prob);
    sumw += prob;
  }

  discrete_distribution d(weights.begin(), weights.end());

  std::vector<size_t> ind(dstsize);
  std::generate(ind.begin(), ind.end(), d);

  for (std::vector<size_t>::iterator it = ind.begin(); it != ind.end(); ++it) {
    weighted_point sample = src.at(*it);
    sample.weight = 1.0 / dstsize * sumw / sample.free_double * sample.weight;
    sample.free_double = 0;
    sample.free_long = 0;
    dst.push_back(sample);
  }
}
示例#2
0
void kmeans_compressor::get_bicriteria(
    const wplist& src, csize_t bsize, csize_t dstsize, wplist& dst) {
  timer_start();
  dst.clear();
  wplist resid = src;
  vector<double> weights(src.size());
  double r = (1 - exp(bsize*(log(bsize)-log(src.size()))/dstsize)) / 2;
  r = max(0.1, r);
  std::vector<size_t> ind(bsize);
  while (resid.size() > 1 && dst.size() < dstsize) {
    timer_start();
    weights.resize(resid.size());
    for (wplist::iterator it = resid.begin(); it != resid.end(); ++it) {
      weights[it - resid.begin()] = it->weight;
    }
    discrete_distribution d(weights.begin(), weights.end());
    std::generate(ind.begin(), ind.end(), d);

    std::sort(ind.begin(), ind.end());
    std::vector<size_t>::iterator it = std::unique(ind.begin(), ind.end());
    ind.erase(it, ind.end());

    for (it = ind.begin(); it != ind.end(); ++it) {
       weighted_point p = resid[*it];
       p.free_long = 0;
       dst.push_back(p);
    }

    for (wplist::iterator itr = resid.begin(); itr != resid.end(); ++itr) {
       (*itr).free_double = min_dist(*itr, dst).second;
    }
    std::sort(resid.begin(), resid.end(), compare_weight);
    csize_t size = (csize_t)std::min(static_cast<double>(resid.size()),
                                     static_cast<double>(resid.size()*r));
    if (size  == 0) {
      size = 1;
    }

    resid.resize(size);
  }
}