void KMeansTrainer::initializeMeans(math::ConstMatrixReference<double, math::MatrixLayout::columnMajor> X) { size_t N = X.NumColumns(); size_t K = _numClusters; size_t choice = rand() % N; _means.GetColumn(0).CopyFrom(X.GetColumn(choice)); math::ColumnVector<double> minimumDistance(X.NumColumns()); for (int k = 1; k < _numClusters; ++k) { // distance to previously selected mean auto D = pairwiseDistance(X, _means.GetSubMatrix(0, k - 1, _means.NumRows(), 1)); auto distanceToPreviousMean = D.GetColumn(0); if (k == 1) { minimumDistance.CopyFrom(distanceToPreviousMean); } else { // distance to closest center for (int i = 0; i < minimumDistance.Size(); ++i) minimumDistance[i] = std::min(minimumDistance[i], distanceToPreviousMean[i]); } choice = weightedSample(minimumDistance); _means.GetColumn(k).CopyFrom(X.GetColumn(choice)); } }
double KMeansTrainer::assignClosestCenter(math::ConstMatrixReference<double, math::MatrixLayout::columnMajor> X, math::VectorReference<size_t, math::VectorOrientation::column> clusterAssignment) { auto D = pairwiseDistance(X, _means); double totalDist = 0; for (int i = 0; i < D.NumRows(); ++i) { auto dist = D.GetRow(i); auto minElement = std::min_element(dist.GetDataPointer(), dist.GetDataPointer() + dist.Size()); clusterAssignment[i] = minElement - dist.GetDataPointer(); totalDist += *minElement; } return totalDist; }
Structured( const RMatrixXf & lbl, const VectorXf & weight, int n_struc_samples ): Split( project1D(pairwiseDistance(lbl,n_struc_samples),&rep_label_), weight ) { }