void do_batch(size_t num) { eigen_wsvec_list_t vs; for (size_t i = 0; i < num; ++i) { eigen_wsvec_t wsvec_a, wsvec_b; wsvec_a.data = eigen_svec_t(2); wsvec_b.data = eigen_svec_t(2); wsvec_a.data.coeffRef(0) = r_.next_gaussian() + 2.0; wsvec_a.data.coeffRef(1) = r_.next_gaussian() + 2.0; wsvec_b.data.coeffRef(0) = r_.next_gaussian() - 2.0; wsvec_b.data.coeffRef(1) = r_.next_gaussian() - 2.0; wsvec_a.weight = 1.0; wsvec_b.weight = 1.0; vs.push_back(wsvec_a); vs.push_back(wsvec_b); } gmm_->batch(vs, k_, d_); }
void gmm::batch(const eigen_wsvec_list_t& data, int d, int k) { if (data.empty()) { *this = gmm(); return; } typedef eigen_wsvec_list_t::const_iterator data_iter; initialize(data, d, k); eigen_svec_list_t old_means; eigen_smat_list_t old_covs; eigen_solver_list_t old_solvers; double old_obj = 0, obj = 0; vector<double> weights(k); bool converged = false; int64_t niter = 1; while (!converged) { old_covs = covs_; old_means = means_; old_solvers = cov_solvers_; old_obj = obj; obj = 0; fill(weights.begin(), weights.end(), 0); fill(means_.begin(), means_.end(), eigen_svec_t(d)); fill(covs_.begin(), covs_.end(), eigen_smat_t(d, d)); for (data_iter i = data.begin(); i != data.end(); ++i) { eigen_svec_t cps = cluster_probs(i->data, old_means, old_covs, old_solvers); for (int c = 0; c < k; ++c) { double cp = i->weight * cps.coeff(c); means_[c] += cp * i->data; covs_[c] += i->data * (i->data.transpose()) * cp; weights[c] += cp; obj -= std::log(std::max(cp, std::numeric_limits<double>::min())); } } for (int c = 0; c < k; ++c) { means_[c] /= weights[c]; covs_[c] /= weights[c]; double eps = 0.1; covs_[c] -= means_[c] * means_[c].transpose(); covs_[c] += eps * eye_; cov_solvers_[c] = shared_ptr<eigen_solver_t>(new eigen_solver_t(covs_[c])); } converged = is_converged(niter++, means_, old_means, obj, old_obj); } }