void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){ //std::cout << "Training" << std::endl; // c_width = (float*) malloc(sizeof(float)*network_size); // memset(c_width,0,sizeof(float)*network_size); DeviceMatrix<float> device_X(Input); //std::cout << "KMeans" << std::endl; clock_t initialTime = clock(); KMeans KM; KM.SetSeed(seed); dCenters = KM.Execute(device_X,network_size); cudaThreadSynchronize(); times[0] = (clock() - initialTime); //std::cout << "Adjust Widths" << std::endl; /*Adjust width using mean of distance to neighbours*/ initialTime = clock(); AdjustWidths(number_neighbours); cudaThreadSynchronize(); times[1] = (clock() - initialTime); /*Training weights and scaling factor*/ HostMatrix<float> TargetArr(Target.Rows(),NumClasses); memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements()); for(int i = 0; i < Target.Rows(); i++){ TargetArr(i,((int)Target(i,0)-1)) = 1; } DeviceMatrix<float> d_Target(TargetArr); //std::cout << "Calculating Weights" << std::endl; initialTime = clock(); DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor); KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer()); DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix); dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns()); d_Aplus.Multiply(d_Aplus,d_Target,dWeights); /*Return Weights and Centers*/ cudaThreadSynchronize(); times[2] = (clock() - initialTime); // cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost); // this->Weights = HostMatrix<float>(dWeights); // this->Centers = HostMatrix<float>(dCenters); }
int main() { double data[] = { 0.0, 0.2, 0.4, 0.3, 0.2, 0.4, 0.4, 0.2, 0.4, 0.5, 0.2, 0.4, 5.0, 5.2, 8.4, 6.0, 5.2, 7.4, 4.0, 5.2, 4.4, 10.3, 10.4, 10.5, 10.1, 10.6, 10.7, 11.3, 10.2, 10.9 }; const int size = 10; //Number of samples const int dim = 3; //Dimension of feature const int cluster_num = 4; //Cluster number KMeans* kmeans = new KMeans(dim,cluster_num); int* labels = new int[size]; kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(data,size,labels); for(int i = 0; i < size; ++i) { printf("%f, %f, %f belongs to %d cluster\n", data[i*dim+0], data[i*dim+1], data[i*dim+2], labels[i]); } delete []labels; delete kmeans; return 0; }
void test_kmeans() { printf("[test kmeans]\n"); Loader loader("data/kmeans"); HFMatrix<double> matrix(loader); KMeans kmeans; kmeans.set_distance(new Euclidean); kmeans.cluster(&matrix, 5); }
bool KMeansQuantizer::train(MatrixDouble &trainingData){ if( !initialized ){ errorLog << "train(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl; return false; } //Reset any previous model quantizerTrained = false; featureDataReady = false; clusters.clear(); quantizationDistances.clear(); //Train the KMeans model KMeans kmeans; kmeans.setNumClusters(numClusters); kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-10 ); kmeans.setMinNumEpochs( 10 ); kmeans.setMaxNumEpochs( 10000 ); if( !kmeans.trainInplace(trainingData) ){ errorLog << "train(MatrixDouble &trainingData) - Failed to train quantizer!" << endl; return false; } //Save the clusters from the KMeans model clusters = kmeans.getClusters(); quantizationDistances.resize(numClusters,0); quantizerTrained = true; return true; }
bool KMeansQuantizer::train_(MatrixDouble &trainingData){ //Clear any previous model clear(); //Train the KMeans model KMeans kmeans; kmeans.setNumClusters(numClusters); kmeans.setComputeTheta( true ); kmeans.setMinChange( minChange ); kmeans.setMinNumEpochs( minNumEpochs ); kmeans.setMaxNumEpochs( maxNumEpochs ); if( !kmeans.train_(trainingData) ){ errorLog << "train_(MatrixDouble &trainingData) - Failed to train quantizer!" << endl; return false; } trained = true; initialized = true; numInputDimensions = trainingData.getNumCols(); numOutputDimensions = 1; //This is always 1 for the KMeansQuantizer featureVector.resize(numOutputDimensions,0); clusters = kmeans.getClusters(); quantizationDistances.resize(numClusters,0); return true; }
void RefinedStart::Cluster(const MatType& data, const size_t clusters, arma::mat& centroids) const { // This will hold the sampled datasets. const size_t numPoints = size_t(percentage * data.n_cols); MatType sampledData(data.n_rows, numPoints); // vector<bool> is packed so each bool is 1 bit. std::vector<bool> pointsUsed(data.n_cols, false); arma::mat sampledCentroids(data.n_rows, samplings * clusters); for (size_t i = 0; i < samplings; ++i) { // First, assemble the sampled dataset. size_t curSample = 0; while (curSample < numPoints) { // Pick a random point in [0, numPoints). size_t sample = (size_t) math::RandInt(data.n_cols); if (!pointsUsed[sample]) { // This point isn't used yet. So we'll put it in our sample. pointsUsed[sample] = true; sampledData.col(curSample) = data.col(sample); ++curSample; } } // Now, using the sampled dataset, run k-means. In the case of an empty // cluster, we re-initialize that cluster as the point furthest away from // the cluster with maximum variance. This is not *exactly* what the paper // implements, but it is quite similar, and we'll call it "good enough". KMeans<> kmeans; kmeans.Cluster(sampledData, clusters, centroids); // Store the sampled centroids. sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids; pointsUsed.assign(data.n_cols, false); } // Now, we run k-means on the sampled centroids to get our final clusters. KMeans<> kmeans; kmeans.Cluster(sampledCentroids, clusters, centroids); }
int main (int argc, const char * argv[]) { //Create a new KMeans instance KMeans kmeans; kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-10 ); kmeans.setMinNumEpochs( 10 ); kmeans.setMaxNumEpochs( 10000 ); //There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for //These are: //- with labelled training data (in the ClassificationData format) //- with unlablled training data (in the UnlabelledData format) //- with unlabelled training data (in a simple MatrixDouble format) //This example shows you how to train the algorithm with ClassificationData //Load some training data to train the KMeans algorithm ClassificationData trainingData; if( !trainingData.load("LabelledClusterData.csv") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset if( !kmeans.train( trainingData ) ){ cout << "Failed to train model!\n"; return EXIT_FAILURE; } //Get the K clusters from the KMeans instance and print them cout << "\nClusters:\n"; MatrixFloat clusters = kmeans.getClusters(); for(unsigned int k=0; k<clusters.getNumRows(); k++){ for(unsigned int n=0; n<clusters.getNumCols(); n++){ cout << clusters[k][n] << "\t"; }cout << endl; } return EXIT_SUCCESS; }
void ex_kmeans () { SampleList samples; ssi_size_t n_classes = 4; ssi_size_t n_sampels = 200; ssi_size_t n_streams = 1; ssi_real_t distr[][3] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; ModelTools::CreateTestSamples (samples, n_classes, n_sampels, n_streams, distr); // training { KMeans *model = ssi_create (KMeans, "kmeans", true); model->getOptions()->k = n_classes; Trainer trainer (model); trainer.train (samples); trainer.save ("kmeans"); } // evaluation { Trainer trainer; Trainer::Load (trainer, "kmeans"); trainer.cluster (samples); ModelTools::PlotSamples(samples, "kmeans", ssi_rect(650, 0, 400, 400)); } // split { KMeans *model = ssi_create (KMeans, "kmeans", true); model->load("kmeans.trainer.KMeans.model"); ISSelectSample ss (&samples); ss.setSelection (model->getIndicesPerClusterSize(1), model->getIndicesPerCluster(1)); ModelTools::PlotSamples (ss, "kmeans", ssi_rect(650,0,400,400)); } }
bool KMeansFeatures::train_(MatrixDouble &trainingData){ if( !initialized ){ errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl; return false; } //Reset any previous model featureDataReady = false; const UINT M = trainingData.getNumRows(); const UINT N = trainingData.getNumCols(); numInputDimensions = N; numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ]; //Scale the input data if needed ranges = trainingData.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0); } } } //Train the KMeans model at each layer const UINT K = (UINT)numClustersPerLayer.size(); for(UINT k=0; k<K; k++){ KMeans kmeans; kmeans.setNumClusters( numClustersPerLayer[k] ); kmeans.setComputeTheta( true ); kmeans.setMinChange( minChange ); kmeans.setMinNumEpochs( minNumEpochs ); kmeans.setMaxNumEpochs( maxNumEpochs ); trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl; if( !kmeans.train_( trainingData ) ){ errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl; return false; } //Save the clusters clusters.push_back( kmeans.getClusters() ); //Project the data through the current layer to use as training data for the next layer if( k+1 != K ){ MatrixDouble data( M, numClustersPerLayer[k] ); VectorDouble input( trainingData.getNumCols() ); VectorDouble output( data.getNumCols() ); for(UINT i=0; i<M; i++){ //Copy the data into the sample for(UINT j=0; j<input.size(); j++){ input[j] = trainingData[i][j]; } //Project the sample through the current layer if( !projectDataThroughLayer( input, output, k ) ){ errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl; return false; } //Copy the result into the training data for the next layer for(UINT j=0; j<output.size(); j++){ data[i][j] = output[j]; } } //Swap the data for the next layer trainingData = data; } } //Flag that the kmeans model has been trained trained = true; featureVector.resize( numOutputDimensions, 0 ); return true; }
int main(int argc, char** argv) { //initialize FreeImage library if needed #ifdef FREEIMAGE_LIB FreeImage_Initialise(); #endif //hardcode the image names, create vectors and FreeImage library object char* file_in = "test.jpg"; char* file_out = "test_result.jpg"; vector<centroid> centroids(CENTROID_COUNT); // Use a standard array instead of vector centroid centroid_array[CENTROID_COUNT]; vector<pixel> pixels; fipImage input; KMeans img; //generate centroids randomly mt19937 generator(chrono::system_clock::now().time_since_epoch().count()); uniform_real_distribution<float> distro(0.0f, 1.0f); for(int i = 0; i < CENTROID_COUNT; ++i) { centroid temp; temp.r = distro(generator); temp.g = distro(generator); temp.b = distro(generator); img.centroids[i] = temp; } //open and load image as per convention from freeimage if(!input.load(file_in)) { cout << "Could not load file with name " << file_in << endl; return 1; } FREE_IMAGE_TYPE originalType = input.getImageType(); if(!input.convertTo24Bits()) { cout << "Error occurred when converting pixels to float values." << endl; return 1; } // Assign common method results to variables to save access times unsigned int width = input.getWidth(); unsigned int height = input.getHeight(); //create pixel structs //access raw data //float* pixelData = reinterpret_cast<float*>(input.accessPixels()); img.pixels.resize(width * height); for (unsigned int i = 0; i < width; ++i) { for (unsigned int j = 0; j < height; ++j) { pixel temp; byte colors[4]; input.getPixelColor(i, j, reinterpret_cast<RGBQUAD*>(colors)); temp.b = colors[0] / 255.0f; temp.g = colors[1] / 255.0f; temp.r = colors[2] / 255.0f; temp.cluster = -1; img.pixels[j * width + i] = temp; } } StopWatch timer; timer.start(); for(int z = 0;z<1000;z++){ img.assignCentroids(); img.moveCentroids(); //cout<<z<<endl; } img.assignFinalPixelColors(); timer.stop(); //write image //allocate output image fipImage output(FIT_BITMAP, width, height, 24); unsigned int outWidth = output.getWidth(); unsigned int outHeight = output.getHeight(); for(unsigned int i = 0; i < outWidth; ++i) { for(unsigned int j = 0; j < outHeight; ++j) { byte colors[4]; int index = j * outWidth + i; colors[0] = static_cast<byte>(img.pixels[index].b * 255); colors[1] = static_cast<byte>(img.pixels[index].g * 255); colors[2] = static_cast<byte>(img.pixels[index].r * 255); output.setPixelColor(i, j, reinterpret_cast<RGBQUAD*>(colors)); } } if(!output.convertToType(originalType)) { cout << "Could not convert back to 24 bits for image saving." << endl; return 1; } if(!output.save(file_out)) { cout << "Something went wrong with filesaving" << endl; return 1; } #ifdef FREEIMAGE_LIB FreeImage_Uninitialise(); #endif return 0; }
arma::vec Vespucci::Math::KMeansWrapper::Cluster(const arma::mat &data, const size_t clusters, arma::mat ¢roids) { using namespace mlpack::metric; using namespace mlpack::kmeans; arma::Row<size_t> assignments; arma::vec assignments_vec; if (allow_empty_){ if (metric_ == "squaredeuclidean"){ if (init_ == "sampleinitialization"){ KMeans<SquaredEuclideanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<SquaredEuclideanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "euclidean"){ if (init_ == "sampleinitialization"){ KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<EuclideanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "manhattan"){ if (init_ == "sampleinitialization"){ KMeans<ManhattanDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ManhattanDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ManhattanDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "chebyshev"){ if (init_ == "sampleinitialization"){ KMeans<ChebyshevDistance, SampleInitialization, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ChebyshevDistance, RandomPartition, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ChebyshevDistance, RefinedStart, AllowEmptyClusters> k; k.Cluster(data, clusters, assignments, centroids); } } } else{ if (metric_ == "squaredeuclidean"){ if (init_ == "sampleinitialization"){ KMeans<SquaredEuclideanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<SquaredEuclideanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<SquaredEuclideanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "euclidean"){ if (init_ == "sampleinitialization"){ KMeans<EuclideanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<EuclideanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<EuclideanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "manhattan"){ if (init_ == "sampleinitialization"){ KMeans<ManhattanDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ManhattanDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ManhattanDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } else if (metric_ == "chebyshev"){ if (init_ == "sampleinitialization"){ KMeans<ChebyshevDistance, SampleInitialization> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "randompartition"){ KMeans<ChebyshevDistance, RandomPartition> k; k.Cluster(data, clusters, assignments, centroids); } else if (init_ == "refinedstart"){ KMeans<ChebyshevDistance, RefinedStart> k; k.Cluster(data, clusters, assignments, centroids); } } } assignments_vec.set_size(assignments.n_elem); for (arma::uword i = 0; i < assignments.n_elem; ++i) assignments_vec(i) = double(assignments(i) + 1); return assignments_vec; }
int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); petuum::HighResolutionTimer data_loading_timer; LOG(INFO)<< "training file location: " << FLAGS_train_file; KMeans kmeans; kmeans.ReadData(); LOG(INFO)<< "Data Loading Complete. Loaded " << kmeans.GetTrainingDataSize() << " in " <<data_loading_timer.elapsed(); // kmeans. petuum::TableGroupConfig table_group_config; table_group_config.num_comm_channels_per_client = FLAGS_num_comm_channels_per_client; table_group_config.num_total_clients = FLAGS_num_clients; table_group_config.num_tables = 4; // // + 1 for main() thread. table_group_config.num_local_app_threads = FLAGS_num_app_threads + 1; table_group_config.client_id = FLAGS_client_id; table_group_config.stats_path = FLAGS_stats_path; // petuum::GetHostInfos(FLAGS_hostfile, &table_group_config.host_map); if (std::string("SSP").compare(FLAGS_consistency_model) == 0) { table_group_config.consistency_model = petuum::SSP; } else if (std::string("SSPPush").compare(FLAGS_consistency_model) == 0) { table_group_config.consistency_model = petuum::SSPPush; } else if (std::string("LocalOOC").compare(FLAGS_consistency_model) == 0) { table_group_config.consistency_model = petuum::LocalOOC; } else { LOG(FATAL)<< "Unkown consistency model: " << FLAGS_consistency_model; } petuum::PSTableGroup::RegisterRow<petuum::DenseRow<float> >( kDenseRowFloatTypeID); petuum::PSTableGroup::RegisterRow<petuum::DenseRow<int> >( kDenseRowIntTypeID); // petuum::PSTableGroup::Init(table_group_config, false); // petuum::ClientTableConfig table_config; table_config.table_info.row_type = kDenseRowFloatTypeID; table_config.table_info.table_staleness = FLAGS_staleness; // //table_config.table_info.row_capacity = feature_dim * num_labels; table_config.table_info.row_capacity = FLAGS_dimensionality; table_config.table_info.row_oplog_type = FLAGS_row_oplog_type; table_config.table_info.oplog_dense_serialized = FLAGS_oplog_dense_serialized; table_config.table_info.dense_row_oplog_capacity = FLAGS_dimensionality; // //table_config.process_cache_capacity = 1; table_config.process_cache_capacity = FLAGS_num_centers; table_config.oplog_capacity = table_config.process_cache_capacity; petuum::PSTableGroup::CreateTable(FLAGS_centres_table_id, table_config); LOG(INFO) << "created centers table"; //Objective Function table. table_config.table_info.dense_row_oplog_capacity = FLAGS_num_epochs+1; table_config.table_info.table_staleness = 0; petuum::PSTableGroup::CreateTable(FLAGS_objective_function_value_tableId, table_config); LOG(INFO) << "created objective values table"; // Centers table table_config.table_info.row_type = kDenseRowIntTypeID; table_config.table_info.table_staleness = FLAGS_count_table_staleness; table_config.table_info.row_capacity = FLAGS_num_centers; table_config.process_cache_capacity = 1000; table_config.oplog_capacity = table_config.process_cache_capacity; petuum::PSTableGroup::CreateTable(FLAGS_center_count_tableId, table_config); // Table to hold the local deltas. petuum::PSTableGroup::CreateTable(FLAGS_update_centres_table_id, table_config); LOG(INFO) << "Completed creating tables" ; petuum::PSTableGroup::CreateTableDone(); std::vector<std::thread> threads(FLAGS_num_app_threads); for (auto& thr : threads) { thr = std::thread(&KMeans::Start, std::ref(kmeans)); } for (auto& thr : threads) { thr.join(); } petuum::PSTableGroup::ShutDown(); LOG(INFO)<< "Kmeans finished and shut down!"; return 0; }
void GMM::Init(const char* sampleFileName) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(sampleFileName, "gmm_init.tmp"); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); // Open the sample and label file to initialize the model ifstream sampleFile(sampleFileName, ios_base::binary); //assert(sampleFile); ifstream labelFile("gmm_init.tmp", ios_base::binary); //assert(labelFile); int size = 0; sampleFile.read((char*)&size, sizeof(int)); sampleFile.seekg(2 * sizeof(int), ios_base::beg); labelFile.seekg(sizeof(int), ios_base::beg); double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { sampleFile.read((char*)x, sizeof(double) * m_dimNum); labelFile.read((char*)&label, sizeof(int)); // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; sampleFile.close(); labelFile.close(); }
void GMM::Init(double *data, int N) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); int *Label; Label=new int[N]; kmeans->Cluster(data,N,Label); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); int size = 0; size=N; double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { for(int j=0;j<m_dimNum;j++) x[j]=data[i*m_dimNum+j]; label=Label[i]; // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; delete[] Label; }
int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); static int index = 0; ///Laser Data LMS1xx LaserSensor; scanCfg cfg; scanData data; scanDataCfg dataCfg; status_t status; std::ofstream file; double start_angle=0; double stop_angle=0; double resolution=0; double frequency=0; ///Kmeans realated variables KMeans<> K; mat dataset; size_t cluster; Col<size_t> assignments; mat centroid; ///Connect to the Lasersensor LaserSensor.connect(host); if(LaserSensor.isConnected()) { std::cout << "\nConnected !!!\n"; LaserSensor.login(); ///Get Laser Configurations cfg = LaserSensor.getScanCfg(); //cfg.angleResolution = 0.25*10000.0; //cfg.scaningFrequency = 25*100; //LaserSensor.setScanCfg(cfg); //LaserSensor.saveConfig(); // sleep(3); cfg = LaserSensor.getScanCfg(); start_angle = cfg.startAngle/10000.0; //* DEG2RAD - M_PI/2; stop_angle = cfg.stopAngle/10000.0; //* DEG2RAD - M_PI/2; resolution = cfg.angleResolution/10000.0; frequency = cfg.scaningFrequency/100; std::cout << "Start Angle: " << start_angle; std::cout << "\tStop Angle: " << stop_angle; std::cout << "\tResolution: " << resolution; std::cout << "\tFrequency: " << frequency; std::cout << std::endl; dataCfg.outputChannel = 1; dataCfg.remission = true; dataCfg.resolution = 1; dataCfg.encoder = 0; dataCfg.position = false; dataCfg.deviceName = false; dataCfg.outputInterval = 1; LaserSensor.setScanDataCfg(dataCfg); ///Set Data Configuration of the laser data LaserSensor.startMeas(); ///Start Measurement do { status = LaserSensor.queryStatus(); usleep(200); } while(status != ready_for_measurement); { LaserSensor.startDevice(); LaserSensor.scanContinous(1); while(LaserSensor.isConnected()) { LaserSensor.getData(data); ///Get the Laser Data // u_int16_t range[data.dist_len1]; // u_int16_t intensity[data.rssi_len1]; int range[data.dist_len1]; int intensity[data.rssi_len1]; for(int i=0; i<data.dist_len1;i++) range[i] = data.dist1[i]; for(int i=0; i<data.rssi_len1;i++) intensity[i] = data.rssi1[i]; if (index == 0) { index++; std::cout << std::endl << "Data len = " << data.dist_len1 << std::endl; std::cout << "Intensity len = " << data.rssi_len1 << std::endl; ///distance assumed to be in mm ///Start angle is -45 end is 225 float angle_scan = -45.0; float x[1081], y[1081]; ///The resolution is 0.5 degress so 541 values int index_range = 0; double slope; cluster = 2; //centroid.zeros(); dataset.resize(2,1081); dataset.zeros(); file.open("LaserData.txt"); while(1) { x[index_range] = range[index_range]*cos(angle_scan*DEG2RAD)/1000.0; y[index_range] = range[index_range] * sin(angle_scan*DEG2RAD)/1000.0; //std::cout << "range: " << range[index_range] << " angle: " << angle_scan; //std::cout << " x: " << x[index_range] << " y : " << y[index_range] << std::endl; angle_scan += 0.25; //if(intensity[index_range] >=850) { file << x[index_range] << "," << y[index_range] << "," << intensity[index_range] << std::endl; } if (angle_scan > 225.0) { break; } index_range++; usleep(100); } int index_tmp = 0; for(int i=0; i<1081;i++) { if (intensity[i] >= 900) { dataset(0,index_tmp) = x[i]; dataset(1,index_tmp) = y[i]; std::cout << "\n" << dataset[0,index_tmp] << "\t" << dataset[1,index_tmp]; index_tmp++; } } std::cout << "\nKMeans Calculations!!!" << std::endl; dataset.resize(2,index_tmp); ///Actual KMeans CLustering K.Cluster((arma::mat) dataset,2,assignments,centroid); /************************************************************************************************************************* static double sum_x[2]; static double sum_y[2]; int number_dist1=0; int number_dist2=0; for(int i=0; i < assignments.size(); i++) { switch(assignments[i]) { case 0: sum_x[0]+=dataset(0,i); sum_y[0]+=dataset(1,i); number_dist1++; break; case 1: sum_x[1]+=dataset(0,i); sum_y[1]+=dataset(1,i); number_dist2++; break; }; std::cout << "\n" << assignments[i]; } double center1_x, center1_y; double center2_x, center2_y; center1_x = sum_x[0]/number_dist1; center1_y = sum_y[0]/number_dist1; center2_x = sum_x[1]/number_dist2; center2_y = sum_y[1]/number_dist2; std::cout<<center1_x<<"," << center1_y<<" " << center2_x<<","<<center2_y<<endl; **************************************************************************************************************************/ //std::cout << "\n" << centroid(0,0) << "\t" << centroid(1,0) << "\t"<< centroid(0,1) << "\t"<< centroid(1,1)<<"\n"; slope = (centroid(1,1) - centroid(1,0)) / (centroid(0,1) - centroid(0,0)); slope = (atan(slope))*RAD2DEG; std::cout << "\nclusters= " << cluster << std::endl; std::cout << "\nOrientation= " << slope << std::endl; } usleep(200); } std::cout << "\n Sensor Disconnected \n"; ///Disconnect the Laser LaserSensor.scanContinous(0); LaserSensor.stopMeas(); LaserSensor.disconnect(); file.close(); } } else { std::cout <<"\nSensor Not Connected !!!\n"; } return a.exec(); }
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){ error = 0; threshold = 0; const UINT M = trainingData.getNumSamples(); const UINT K = (UINT)classLabels.size(); Float giniIndexL = 0; Float giniIndexR = 0; Float weightL = 0; Float weightR = 0; VectorFloat groupCounter(2,0); MatrixFloat classProbabilities(K,2); //Use this data to train a KMeans cluster with 2 clusters KMeans kmeans; kmeans.setNumClusters( 2 ); kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-5 ); kmeans.setMinNumEpochs( 1 ); kmeans.setMaxNumEpochs( 100 ); //Disable the logging to clean things up kmeans.setTrainingLoggingEnabled( false ); if( !kmeans.train_( data ) ){ errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl; return false; } //Set the split threshold as the mid point between the two clusters const MatrixFloat &clusters = kmeans.getClusters(); threshold = 0; for(UINT i=0; i<clusters.getNumRows(); i++){ threshold += clusters[i][0]; } threshold /= clusters.getNumRows(); //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold groupCounter[0] = groupCounter[1] = 0; classProbabilities.setAllValues(0); for(UINT i=0; i<M; i++){ groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0; groupCounter[ groupIndex[i] ]++; classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++; } //Compute the class probabilities for the lhs group and rhs group for(UINT k=0; k<K; k++){ classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0; classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0; } //Compute the Gini index for the lhs and rhs groups giniIndexL = giniIndexR = 0; for(UINT k=0; k<K; k++){ giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]); giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]); } weightL = groupCounter[0]/M; weightR = groupCounter[1]/M; error = (giniIndexL*weightL) + (giniIndexR*weightR); return true; }
void RefinedStart::Cluster(const MatType& data, const size_t clusters, arma::Col<size_t>& assignments) const { math::RandomSeed(std::time(NULL)); // This will hold the sampled datasets. const size_t numPoints = size_t(percentage * data.n_cols); MatType sampledData(data.n_rows, numPoints); // vector<bool> is packed so each bool is 1 bit. std::vector<bool> pointsUsed(data.n_cols, false); arma::mat sampledCentroids(data.n_rows, samplings * clusters); // We will use these objects repeatedly for clustering. arma::Col<size_t> sampledAssignments; arma::mat centroids; KMeans<> kmeans; for (size_t i = 0; i < samplings; ++i) { // First, assemble the sampled dataset. size_t curSample = 0; while (curSample < numPoints) { // Pick a random point in [0, numPoints). size_t sample = (size_t) math::RandInt(data.n_cols); if (!pointsUsed[sample]) { // This point isn't used yet. So we'll put it in our sample. pointsUsed[sample] = true; sampledData.col(curSample) = data.col(sample); ++curSample; } } // Now, using the sampled dataset, run k-means. In the case of an empty // cluster, we re-initialize that cluster as the point furthest away from // the cluster with maximum variance. This is not *exactly* what the paper // implements, but it is quite similar, and we'll call it "good enough". kmeans.Cluster(sampledData, clusters, sampledAssignments, centroids); // Store the sampled centroids. sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids; pointsUsed.assign(data.n_cols, false); } // Now, we run k-means on the sampled centroids to get our final clusters. kmeans.Cluster(sampledCentroids, clusters, sampledAssignments, centroids); // Turn the final centroids into assignments. assignments.set_size(data.n_cols); for (size_t i = 0; i < data.n_cols; ++i) { // Find the closest centroid to this point. double minDistance = std::numeric_limits<double>::infinity(); size_t closestCluster = clusters; for (size_t j = 0; j < clusters; ++j) { const double distance = kmeans.Metric().Evaluate(data.col(i), centroids.col(j)); if (distance < minDistance) { minDistance = distance; closestCluster = j; } } // Assign the point to its closest cluster. assignments[i] = closestCluster; } }