inline void bfs_owner_extrema(dataset_t &ds,cellid_t s) { const int dim = (dir == DES)?(gc_max_cell_dim):(0); std::stack<cellid_t> stk; stk.push(s); cellid_t f[20],*fe,*fb; ASSERT(ds.cell_dim(s) == dim); while(!stk.empty()) { cellid_t c = stk.top(); stk.pop(); ds.owner(c) = s; fb = f; fe = f + ds.get_cets<dir>(c,f); for (; fb != fe; ++fb ) if( ds.is_paired(*fb)) { cellid_t p = ds.pair(*fb); if(p != c && ds.cell_dim(p) == dim) stk.push(p); } } }
void histogram_t::compile (const dataset_t &d) { for (size_t s = 0; s < d.size (); s++) { add_point (d[s]); } }
static void kmeans_center_init_random(cluster_t nb_center, histogram_c ¢er, dataset_t &dataset, nbgen &rng) { size_t nb_data = dataset.size(); center.resize(nb_center); for (cluster_t i = 0; i < nb_center; i++) center[i] = dataset[rng() % nb_data].histogram; }
void save_tile(GDALDriver& drv, dataset_t dstile, char const* const file) { dataset_t dspng(make_dataset(drv.CreateCopy(file, dstile.get(), false, 0, 0, 0))); if(!dspng) { throw std::runtime_error("failed to save tile"); } }
inline void assign_pairs(dataset_t &ds,Titer b,Titer e) { cellid_t cf[10],*cfe; BOOST_AUTO(cmp,bind(&dataset_t::compare_cells<dim+1>,&ds,_1,_2)); for(;b!=e;++b) { cfe = cf + ds.get_cets<ASC>(*b,cf); cfe = filter_elst(cf,cfe,cf,*b,ds); cellid_t *mcf = min_element(cf,cfe,cmp); if( mcf != cfe && ds.is_boundry(*mcf) == ds.is_boundry(*b)) ds.pair(*b,*mcf); } }
inline Toi collect_cps(const dataset_t &ds,Tii b,Tii e,Toi r) { for(; b!=e; ++b) if(!ds.is_paired(*b)) *r++ = *b; return r; }
inline void assign_max_facets(dataset_t &ds,Titer b,Titer e) { cellid_t f[10]; BOOST_AUTO(cmp,bind(&dataset_t::compare_cells<dim-1>,&ds,_1,_2)); for(;b!=e;++b) ds.max_fct(*b) = *max_element(f,f+ds.get_cets<DES>(*b,f),cmp); }
void read(dataset_t ds, raster_data_t& data, int x, int y, int xs, int ys, int nbands) { assert(data.size() == xs * ys * nbands); ::CPLErr err = ds->RasterIO(::GF_Read, x, y, xs, ys, &data[0], xs, ys, ::GDT_Byte, nbands, 0, 0, 0, 0); if (::CE_None != err) { throw std::runtime_error("raster read failed"); } }
void write(dataset_t ds, raster_data_t const& data, int x, int y, int xs, int ys, int nbands) { assert(data.size() == xs * ys * nbands); ::CPLErr err = ds->RasterIO(::GF_Write, x, y, xs, ys, const_cast<raster_data_t::value_type*>(&data[0]), xs, ys, ::GDT_Byte, nbands, 0, 0, 0, 0); if (::CE_None != err) { throw std::runtime_error("raster write failed"); } }
inline void make_connections(mscomplex_t &msc,const cellid_list_t &ccells,const dataset_t &ds) { msc.resize(ccells.size()); map<cellid_t,int> id_cp_map; for( int i = 0 ; i < ccells.size() ; ++i) { cellid_t c = ccells[i]; msc.set_critpt(i,c,ds.cell_dim(c),ds.fn<dataset_t::CFI_MAX>(c),ds.max_vert<-1>(c),ds.is_boundry(c)); id_cp_map[c] = i; } cellid_t f[20]; int f_ct; for(cellid_list_t::const_iterator b = ccells.begin(),e =ccells.end();b!=e; ++b) if(ds.cell_dim(*b) == 1) { ASSERT(id_cp_map.count(*b) ==1); ds.get_cets<DES>(*b,f); ASSERT(id_cp_map.count(ds.owner(f[0])) == 1); msc.connect_cps(id_cp_map[*b],id_cp_map[ds.owner(f[0])]); ASSERT(id_cp_map.count(ds.owner(f[1])) == 1); msc.connect_cps(id_cp_map[*b],id_cp_map[ds.owner(f[1])]); f_ct = ds.get_cets<ASC>(*b,f); ASSERT(id_cp_map.count(ds.owner(f[0])) == 1); msc.connect_cps(id_cp_map[*b],id_cp_map[ds.owner(f[0])]); if(f_ct == 1) continue; ASSERT(id_cp_map.count(ds.owner(f[1])) == 1); msc.connect_cps(id_cp_map[*b],id_cp_map[ds.owner(f[1])]); } }
// center have to be initialized before calling this function. static void kmeans(cluster_t nb_clusters, dataset_t &dataset, precision_t (*distFunc)(histogram_t &, histogram_t &, unsigned, void *), histogram_c ¢er, unsigned nb_threads = 1, precision_t epsilon = 0.01, void *context = NULL) { size_t nb_data, nb_features, accumulator, per_block, changed, iter; if (nb_clusters > dataset.size()) return; iter = 0; nb_data = dataset.size(); nb_features = dataset[0].histogram.size(); per_block = nb_data / nb_threads; std::vector<size_t> thread_block_size(nb_threads, per_block); thread_block_size.back() += nb_data - nb_threads * per_block; std::vector<std::thread> eval_threads(nb_threads); do { changed = 0; accumulator = 0; for (int t = 0; t < nb_threads; ++t) { accumulator += thread_block_size[t]; eval_threads[t] = std::thread([t, accumulator, &dataset, &thread_block_size, &nb_clusters, ¢er, &changed, &distFunc, &nb_features, &context] { cluster_t curr_cluster, min_cluster; for (size_t i = (accumulator - thread_block_size[t]); i < accumulator; ++i) { curr_cluster = dataset[i].cluster; std::vector<precision_t> variance(nb_clusters); for (unsigned b = 0; b < nb_clusters; ++b) variance[b] = (*distFunc)(dataset[i].histogram, center[b], nb_features, context); min_cluster = std::distance( variance.begin(), std::min_element(variance.begin(), variance.end())); if (min_cluster != curr_cluster) ++changed; dataset[i].cluster = min_cluster; } }); } for (int t = 0; t < nb_threads; ++t) eval_threads[t].join(); std::vector<precision_t> cluster_element_counter(nb_clusters, 0); std::vector<std::vector<precision_t>> cluster_probability_mass( nb_clusters, histogram_t(nb_features, 0)); for (unsigned i = 0; i < nb_data; ++i) { ++cluster_element_counter[dataset[i].cluster]; for (unsigned j = 0; j < nb_features; ++j) { cluster_probability_mass[dataset[i].cluster][j] += dataset[i].histogram[j]; } } for (unsigned i = 0; i < nb_clusters; ++i) { for (unsigned j = 0; j < nb_features; ++j) if (cluster_probability_mass[i][j] > 0) cluster_probability_mass[i][j] /= cluster_element_counter[i]; center[i] = cluster_probability_mass[i]; } ++iter; printf("#%zu elements changed: %zu -> %f%%\n", iter, changed, 100 * ((1.0 * changed) / nb_data)); } while ((1.0 * changed) / nb_data > epsilon); }