int main() { double data[] = { 0.0, 0.2, 0.4, 0.3, 0.2, 0.4, 0.4, 0.2, 0.4, 0.5, 0.2, 0.4, 5.0, 5.2, 8.4, 6.0, 5.2, 7.4, 4.0, 5.2, 4.4, 10.3, 10.4, 10.5, 10.1, 10.6, 10.7, 11.3, 10.2, 10.9 }; const int size = 10; //Number of samples const int dim = 3; //Dimension of feature const int cluster_num = 4; //Cluster number KMeans* kmeans = new KMeans(dim,cluster_num); int* labels = new int[size]; kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(data,size,labels); for(int i = 0; i < size; ++i) { printf("%f, %f, %f belongs to %d cluster\n", data[i*dim+0], data[i*dim+1], data[i*dim+2], labels[i]); } delete []labels; delete kmeans; return 0; }
void RefinedStart::Cluster(const MatType& data, const size_t clusters, arma::mat& centroids) const { // This will hold the sampled datasets. const size_t numPoints = size_t(percentage * data.n_cols); MatType sampledData(data.n_rows, numPoints); // vector<bool> is packed so each bool is 1 bit. std::vector<bool> pointsUsed(data.n_cols, false); arma::mat sampledCentroids(data.n_rows, samplings * clusters); for (size_t i = 0; i < samplings; ++i) { // First, assemble the sampled dataset. size_t curSample = 0; while (curSample < numPoints) { // Pick a random point in [0, numPoints). size_t sample = (size_t) math::RandInt(data.n_cols); if (!pointsUsed[sample]) { // This point isn't used yet. So we'll put it in our sample. pointsUsed[sample] = true; sampledData.col(curSample) = data.col(sample); ++curSample; } } // Now, using the sampled dataset, run k-means. In the case of an empty // cluster, we re-initialize that cluster as the point furthest away from // the cluster with maximum variance. This is not *exactly* what the paper // implements, but it is quite similar, and we'll call it "good enough". KMeans<> kmeans; kmeans.Cluster(sampledData, clusters, centroids); // Store the sampled centroids. sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids; pointsUsed.assign(data.n_cols, false); } // Now, we run k-means on the sampled centroids to get our final clusters. KMeans<> kmeans; kmeans.Cluster(sampledCentroids, clusters, centroids); }
int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); static int index = 0; ///Laser Data LMS1xx LaserSensor; scanCfg cfg; scanData data; scanDataCfg dataCfg; status_t status; std::ofstream file; double start_angle=0; double stop_angle=0; double resolution=0; double frequency=0; ///Kmeans realated variables KMeans<> K; mat dataset; size_t cluster; Col<size_t> assignments; mat centroid; ///Connect to the Lasersensor LaserSensor.connect(host); if(LaserSensor.isConnected()) { std::cout << "\nConnected !!!\n"; LaserSensor.login(); ///Get Laser Configurations cfg = LaserSensor.getScanCfg(); //cfg.angleResolution = 0.25*10000.0; //cfg.scaningFrequency = 25*100; //LaserSensor.setScanCfg(cfg); //LaserSensor.saveConfig(); // sleep(3); cfg = LaserSensor.getScanCfg(); start_angle = cfg.startAngle/10000.0; //* DEG2RAD - M_PI/2; stop_angle = cfg.stopAngle/10000.0; //* DEG2RAD - M_PI/2; resolution = cfg.angleResolution/10000.0; frequency = cfg.scaningFrequency/100; std::cout << "Start Angle: " << start_angle; std::cout << "\tStop Angle: " << stop_angle; std::cout << "\tResolution: " << resolution; std::cout << "\tFrequency: " << frequency; std::cout << std::endl; dataCfg.outputChannel = 1; dataCfg.remission = true; dataCfg.resolution = 1; dataCfg.encoder = 0; dataCfg.position = false; dataCfg.deviceName = false; dataCfg.outputInterval = 1; LaserSensor.setScanDataCfg(dataCfg); ///Set Data Configuration of the laser data LaserSensor.startMeas(); ///Start Measurement do { status = LaserSensor.queryStatus(); usleep(200); } while(status != ready_for_measurement); { LaserSensor.startDevice(); LaserSensor.scanContinous(1); while(LaserSensor.isConnected()) { LaserSensor.getData(data); ///Get the Laser Data // u_int16_t range[data.dist_len1]; // u_int16_t intensity[data.rssi_len1]; int range[data.dist_len1]; int intensity[data.rssi_len1]; for(int i=0; i<data.dist_len1;i++) range[i] = data.dist1[i]; for(int i=0; i<data.rssi_len1;i++) intensity[i] = data.rssi1[i]; if (index == 0) { index++; std::cout << std::endl << "Data len = " << data.dist_len1 << std::endl; std::cout << "Intensity len = " << data.rssi_len1 << std::endl; ///distance assumed to be in mm ///Start angle is -45 end is 225 float angle_scan = -45.0; float x[1081], y[1081]; ///The resolution is 0.5 degress so 541 values int index_range = 0; double slope; cluster = 2; //centroid.zeros(); dataset.resize(2,1081); dataset.zeros(); file.open("LaserData.txt"); while(1) { x[index_range] = range[index_range]*cos(angle_scan*DEG2RAD)/1000.0; y[index_range] = range[index_range] * sin(angle_scan*DEG2RAD)/1000.0; //std::cout << "range: " << range[index_range] << " angle: " << angle_scan; //std::cout << " x: " << x[index_range] << " y : " << y[index_range] << std::endl; angle_scan += 0.25; //if(intensity[index_range] >=850) { file << x[index_range] << "," << y[index_range] << "," << intensity[index_range] << std::endl; } if (angle_scan > 225.0) { break; } index_range++; usleep(100); } int index_tmp = 0; for(int i=0; i<1081;i++) { if (intensity[i] >= 900) { dataset(0,index_tmp) = x[i]; dataset(1,index_tmp) = y[i]; std::cout << "\n" << dataset[0,index_tmp] << "\t" << dataset[1,index_tmp]; index_tmp++; } } std::cout << "\nKMeans Calculations!!!" << std::endl; dataset.resize(2,index_tmp); ///Actual KMeans CLustering K.Cluster((arma::mat) dataset,2,assignments,centroid); /************************************************************************************************************************* static double sum_x[2]; static double sum_y[2]; int number_dist1=0; int number_dist2=0; for(int i=0; i < assignments.size(); i++) { switch(assignments[i]) { case 0: sum_x[0]+=dataset(0,i); sum_y[0]+=dataset(1,i); number_dist1++; break; case 1: sum_x[1]+=dataset(0,i); sum_y[1]+=dataset(1,i); number_dist2++; break; }; std::cout << "\n" << assignments[i]; } double center1_x, center1_y; double center2_x, center2_y; center1_x = sum_x[0]/number_dist1; center1_y = sum_y[0]/number_dist1; center2_x = sum_x[1]/number_dist2; center2_y = sum_y[1]/number_dist2; std::cout<<center1_x<<"," << center1_y<<" " << center2_x<<","<<center2_y<<endl; **************************************************************************************************************************/ //std::cout << "\n" << centroid(0,0) << "\t" << centroid(1,0) << "\t"<< centroid(0,1) << "\t"<< centroid(1,1)<<"\n"; slope = (centroid(1,1) - centroid(1,0)) / (centroid(0,1) - centroid(0,0)); slope = (atan(slope))*RAD2DEG; std::cout << "\nclusters= " << cluster << std::endl; std::cout << "\nOrientation= " << slope << std::endl; } usleep(200); } std::cout << "\n Sensor Disconnected \n"; ///Disconnect the Laser LaserSensor.scanContinous(0); LaserSensor.stopMeas(); LaserSensor.disconnect(); file.close(); } } else { std::cout <<"\nSensor Not Connected !!!\n"; } return a.exec(); }
arma::vec Vespucci::Math::KMeansWrapper::Cluster(const arma::mat &data, const size_t clusters, arma::mat ¢roids) { using namespace mlpack::metric; using namespace mlpack::kmeans; arma::Row<size_t> assignments; arma::vec assignments_vec; if (allow_empty_){ if (metric_ == "squaredeuclidean"){ if (init_ == "sampleinitialization"){ KMeans<SquaredEuclideanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<SquaredEuclideanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "euclidean"){ if (init_ == "sampleinitialization"){ KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<EuclideanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "manhattan"){ if (init_ == "sampleinitialization"){ KMeans<ManhattanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ManhattanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ManhattanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "chebyshev"){ if (init_ == "sampleinitialization"){ KMeans<ChebyshevDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ChebyshevDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ChebyshevDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } } else{ if (metric_ == "squaredeuclidean"){ if (init_ == "sampleinitialization"){ KMeans<SquaredEuclideanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<SquaredEuclideanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<SquaredEuclideanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "euclidean"){ if (init_ == "sampleinitialization"){ KMeans<EuclideanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<EuclideanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<EuclideanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "manhattan"){ if (init_ == "sampleinitialization"){ KMeans<ManhattanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ManhattanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ManhattanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "chebyshev"){ if (init_ == "sampleinitialization"){ KMeans<ChebyshevDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ChebyshevDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ChebyshevDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } } assignments_vec.set_size(assignments.n_elem); for (arma::uword i = 0; i < assignments.n_elem; ++i) assignments_vec(i) = double(assignments(i) + 1); return assignments_vec; }
void GMM::Init(const char* sampleFileName) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(sampleFileName, "gmm_init.tmp"); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); // Open the sample and label file to initialize the model ifstream sampleFile(sampleFileName, ios_base::binary); //assert(sampleFile); ifstream labelFile("gmm_init.tmp", ios_base::binary); //assert(labelFile); int size = 0; sampleFile.read((char*)&size, sizeof(int)); sampleFile.seekg(2 * sizeof(int), ios_base::beg); labelFile.seekg(sizeof(int), ios_base::beg); double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { sampleFile.read((char*)x, sizeof(double) * m_dimNum); labelFile.read((char*)&label, sizeof(int)); // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; sampleFile.close(); labelFile.close(); }
void GMM::Init(double *data, int N) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); int *Label; Label=new int[N]; kmeans->Cluster(data,N,Label); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); int size = 0; size=N; double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { for(int j=0;j<m_dimNum;j++) x[j]=data[i*m_dimNum+j]; label=Label[i]; // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; delete[] Label; }
void RefinedStart::Cluster(const MatType& data, const size_t clusters, arma::Col<size_t>& assignments) const { math::RandomSeed(std::time(NULL)); // This will hold the sampled datasets. const size_t numPoints = size_t(percentage * data.n_cols); MatType sampledData(data.n_rows, numPoints); // vector<bool> is packed so each bool is 1 bit. std::vector<bool> pointsUsed(data.n_cols, false); arma::mat sampledCentroids(data.n_rows, samplings * clusters); // We will use these objects repeatedly for clustering. arma::Col<size_t> sampledAssignments; arma::mat centroids; KMeans<> kmeans; for (size_t i = 0; i < samplings; ++i) { // First, assemble the sampled dataset. size_t curSample = 0; while (curSample < numPoints) { // Pick a random point in [0, numPoints). size_t sample = (size_t) math::RandInt(data.n_cols); if (!pointsUsed[sample]) { // This point isn't used yet. So we'll put it in our sample. pointsUsed[sample] = true; sampledData.col(curSample) = data.col(sample); ++curSample; } } // Now, using the sampled dataset, run k-means. In the case of an empty // cluster, we re-initialize that cluster as the point furthest away from // the cluster with maximum variance. This is not *exactly* what the paper // implements, but it is quite similar, and we'll call it "good enough". kmeans.Cluster(sampledData, clusters, sampledAssignments, centroids); // Store the sampled centroids. sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids; pointsUsed.assign(data.n_cols, false); } // Now, we run k-means on the sampled centroids to get our final clusters. kmeans.Cluster(sampledCentroids, clusters, sampledAssignments, centroids); // Turn the final centroids into assignments. assignments.set_size(data.n_cols); for (size_t i = 0; i < data.n_cols; ++i) { // Find the closest centroid to this point. double minDistance = std::numeric_limits<double>::infinity(); size_t closestCluster = clusters; for (size_t j = 0; j < clusters; ++j) { const double distance = kmeans.Metric().Evaluate(data.col(i), centroids.col(j)); if (distance < minDistance) { minDistance = distance; closestCluster = j; } } // Assign the point to its closest cluster. assignments[i] = closestCluster; } }