/* k-means center initialization using the following algorithm: Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding */ static void generateCentersPP(const Mat& _data, Mat& _out_centers, int K, RNG& rng, int trials) { int i, j, k, dims = _data.cols, N = _data.rows; const float* data = _data.ptr<float>(0); size_t step = _data.step/sizeof(data[0]); std::vector<int> _centers(K); int* centers = &_centers[0]; std::vector<float> _dist(N*3); float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; double sum0 = 0; centers[0] = (unsigned)rng % N; for( i = 0; i < N; i++ ) { dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims); sum0 += dist[i]; } for( k = 1; k < K; k++ ) { double bestSum = DBL_MAX; int bestCenter = -1; for( j = 0; j < trials; j++ ) { double p = (double)rng*sum0, s = 0; for( i = 0; i < N-1; i++ ) if( (p -= dist[i]) <= 0 ) break; int ci = i; parallel_for_(Range(0, N), KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); for( i = 0; i < N; i++ ) { s += tdist2[i]; } if( s < bestSum ) { bestSum = s; bestCenter = ci; std::swap(tdist, tdist2); } } centers[k] = bestCenter; sum0 = bestSum; std::swap(dist, tdist); } for( k = 0; k < K; k++ ) { const float* src = data + step*centers[k]; float* dst = _out_centers.ptr<float>(k); for( j = 0; j < dims; j++ ) dst[j] = src[j]; } }
void HistEncoding::deserializeSpec(const tinyxml2::XMLElement* root) { if (const tinyxml2::XMLElement *k = root->FirstChildElement("k")) { k->QueryIntText(&_k); } if (const tinyxml2::XMLElement *pca = root->FirstChildElement("pca")) { _usePCA = true; pca->QueryIntText(&_pcaDim); } if (const tinyxml2::XMLElement *centers = root->FirstChildElement("centers")) { _computed = true; const tinyxml2::XMLElement *center = centers->FirstChildElement("center"); std::vector<std::string> elems = stringSplit(std::string(center->GetText()), ' '); _centers = SampleType(_k, elems.size()); std::cout << elems.size() << std::endl; int row = 0; for (size_t i = 0; i < elems.size(); ++i) { _centers(row, i) = boost::lexical_cast<Scalar>(elems[i]); } row = 1; for (center = center->NextSiblingElement("center"); center != NULL; center = center->NextSiblingElement("center")) { elems = stringSplit(std::string(center->GetText()), ' '); for (unsigned int i = 0; i < _centers.cols(); ++i) { _centers(row, i) = boost::lexical_cast<Scalar>(elems[i]); } row += 1; } _tree = new tet::classification::util::ANNTree<Scalar>(_centers); } }