예제 #1
0
void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){

	//std::cout << "Training" << std::endl;

	//	c_width = (float*) malloc(sizeof(float)*network_size);
	//	memset(c_width,0,sizeof(float)*network_size);

	DeviceMatrix<float> device_X(Input);

	//std::cout << "KMeans" << std::endl;	
	clock_t initialTime = clock();
	KMeans KM;
	KM.SetSeed(seed);
	dCenters = KM.Execute(device_X,network_size);

	cudaThreadSynchronize();
	times[0] = (clock() - initialTime);

	//std::cout << "Adjust Widths" << std::endl;
	/*Adjust width using mean of distance to neighbours*/
	initialTime = clock();
	AdjustWidths(number_neighbours);

	cudaThreadSynchronize();
	times[1] = (clock() - initialTime);

	/*Training weights and scaling factor*/
	HostMatrix<float> TargetArr(Target.Rows(),NumClasses);
	memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements());

	for(int i = 0; i < Target.Rows(); i++){
		TargetArr(i,((int)Target(i,0)-1)) = 1;
	}

	DeviceMatrix<float> d_Target(TargetArr);

	//std::cout << "Calculating Weights" << std::endl;

	initialTime = clock();

	DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor);

	KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer());

	DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix);

	dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns());

	d_Aplus.Multiply(d_Aplus,d_Target,dWeights);


	/*Return Weights and Centers*/
	cudaThreadSynchronize();
	times[2] = (clock() - initialTime);

	// cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost);
	//	this->Weights = HostMatrix<float>(dWeights);		
	//	this->Centers = HostMatrix<float>(dCenters);

}
예제 #2
0
int main()
{
    double data[] = {
        0.0, 0.2, 0.4,
        0.3, 0.2, 0.4,
        0.4, 0.2, 0.4,
        0.5, 0.2, 0.4,
        5.0, 5.2, 8.4,
        6.0, 5.2, 7.4,
        4.0, 5.2, 4.4,
        10.3, 10.4, 10.5,
        10.1, 10.6, 10.7,
        11.3, 10.2, 10.9
    };

    const int size = 10; //Number of samples
    const int dim = 3;   //Dimension of feature
    const int cluster_num = 4; //Cluster number

    KMeans* kmeans = new KMeans(dim,cluster_num);
    int* labels = new int[size];
    kmeans->SetInitMode(KMeans::InitUniform);
	kmeans->Cluster(data,size,labels);

	for(int i = 0; i < size; ++i)
	{
	    printf("%f, %f, %f belongs to %d cluster\n", data[i*dim+0], data[i*dim+1], data[i*dim+2], labels[i]);
	}

	delete []labels;
	delete kmeans;

    return 0;
}
예제 #3
0
파일: test.cpp 프로젝트: mlnotes/machine
void test_kmeans()
{
	printf("[test kmeans]\n");
	
	Loader loader("data/kmeans");
	HFMatrix<double> matrix(loader);

	KMeans kmeans;
	kmeans.set_distance(new Euclidean);
	kmeans.cluster(&matrix, 5);
}
bool KMeansQuantizer::train(MatrixDouble &trainingData){
    
    if( !initialized ){
        errorLog << "train(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    }
    
    //Reset any previous model
    quantizerTrained = false;
    featureDataReady = false;
    clusters.clear();
    quantizationDistances.clear();
    
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setNumClusters(numClusters);
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );
    
    if( !kmeans.trainInplace(trainingData) ){
        errorLog << "train(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    }
    
    //Save the clusters from the KMeans model
    clusters = kmeans.getClusters();
    quantizationDistances.resize(numClusters,0);
    quantizerTrained = true;
    
    return true;
}
예제 #5
0
bool KMeansQuantizer::train_(MatrixDouble &trainingData){
    
    //Clear any previous model
    clear();
    
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setNumClusters(numClusters);
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( minChange );
    kmeans.setMinNumEpochs( minNumEpochs );
	kmeans.setMaxNumEpochs( maxNumEpochs );
    
    if( !kmeans.train_(trainingData) ){
        errorLog << "train_(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    }
    
    trained = true;
    initialized = true;
    numInputDimensions = trainingData.getNumCols();
    numOutputDimensions = 1; //This is always 1 for the KMeansQuantizer
    featureVector.resize(numOutputDimensions,0);
    clusters = kmeans.getClusters();
    quantizationDistances.resize(numClusters,0);
    
    return true;
}
예제 #6
0
void RefinedStart::Cluster(const MatType& data,
                           const size_t clusters,
                           arma::mat& centroids) const
{
  // This will hold the sampled datasets.
  const size_t numPoints = size_t(percentage * data.n_cols);
  MatType sampledData(data.n_rows, numPoints);
  // vector<bool> is packed so each bool is 1 bit.
  std::vector<bool> pointsUsed(data.n_cols, false);
  arma::mat sampledCentroids(data.n_rows, samplings * clusters);

  for (size_t i = 0; i < samplings; ++i)
  {
    // First, assemble the sampled dataset.
    size_t curSample = 0;
    while (curSample < numPoints)
    {
      // Pick a random point in [0, numPoints).
      size_t sample = (size_t) math::RandInt(data.n_cols);

      if (!pointsUsed[sample])
      {
        // This point isn't used yet.  So we'll put it in our sample.
        pointsUsed[sample] = true;
        sampledData.col(curSample) = data.col(sample);
        ++curSample;
      }
    }

    // Now, using the sampled dataset, run k-means.  In the case of an empty
    // cluster, we re-initialize that cluster as the point furthest away from
    // the cluster with maximum variance.  This is not *exactly* what the paper
    // implements, but it is quite similar, and we'll call it "good enough".
    KMeans<> kmeans;
    kmeans.Cluster(sampledData, clusters, centroids);

    // Store the sampled centroids.
    sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids;

    pointsUsed.assign(data.n_cols, false);
  }

  // Now, we run k-means on the sampled centroids to get our final clusters.
  KMeans<> kmeans;
  kmeans.Cluster(sampledCentroids, clusters, centroids);
}
예제 #7
0
int main (int argc, const char * argv[])
{
    //Create a new KMeans instance
    KMeans kmeans;
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );

	//There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for
	//These are:
	//- with labelled training data (in the ClassificationData format)
	//- with unlablled training data (in the UnlabelledData format)
	//- with unlabelled training data (in a simple MatrixDouble format)
	
	//This example shows you how to train the algorithm with ClassificationData
	
	//Load some training data to train the KMeans algorithm
    ClassificationData trainingData;
    
    if( !trainingData.load("LabelledClusterData.csv") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
	
    //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset
    if( !kmeans.train( trainingData ) ){
        cout << "Failed to train model!\n";
        return EXIT_FAILURE;
    }
	
	//Get the K clusters from the KMeans instance and print them
	cout << "\nClusters:\n";
	MatrixFloat clusters = kmeans.getClusters();
    for(unsigned int k=0; k<clusters.getNumRows(); k++){
		for(unsigned int n=0; n<clusters.getNumCols(); n++){
			cout << clusters[k][n] << "\t";
		}cout << endl;
	}
	
    return EXIT_SUCCESS;
}
예제 #8
0
void ex_kmeans () {

	SampleList samples;
	ssi_size_t n_classes = 4;
	ssi_size_t n_sampels = 200;
	ssi_size_t n_streams = 1;
	ssi_real_t distr[][3] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
	ModelTools::CreateTestSamples (samples, n_classes, n_sampels, n_streams, distr);

	// training
	{
		KMeans *model = ssi_create (KMeans, "kmeans", true);
		model->getOptions()->k = n_classes;
		Trainer trainer (model);
		trainer.train (samples);
		trainer.save ("kmeans");
	}

	// evaluation
	{
		Trainer trainer;
		Trainer::Load (trainer, "kmeans");			
		trainer.cluster (samples);

		ModelTools::PlotSamples(samples, "kmeans", ssi_rect(650, 0, 400, 400));
	} 

	// split
	{
		KMeans *model = ssi_create (KMeans, "kmeans", true);
		model->load("kmeans.trainer.KMeans.model");

		ISSelectSample ss (&samples);
		ss.setSelection (model->getIndicesPerClusterSize(1), model->getIndicesPerCluster(1));

		ModelTools::PlotSamples (ss, "kmeans", ssi_rect(650,0,400,400));
		
	}
}
예제 #9
0
bool KMeansFeatures::train_(MatrixDouble &trainingData){
    
    if( !initialized ){
        errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    }
    
    //Reset any previous model
    featureDataReady = false;
    
    const UINT M = trainingData.getNumRows();
    const UINT N = trainingData.getNumCols();
    
    numInputDimensions = N;
    numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ];
    
    //Scale the input data if needed
    ranges = trainingData.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<N; j++){
                trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0);
            }
        }
    }
    
    //Train the KMeans model at each layer
    const UINT K = (UINT)numClustersPerLayer.size();
    for(UINT k=0; k<K; k++){
        KMeans kmeans;
        kmeans.setNumClusters( numClustersPerLayer[k] );
        kmeans.setComputeTheta( true );
        kmeans.setMinChange( minChange );
        kmeans.setMinNumEpochs( minNumEpochs );
        kmeans.setMaxNumEpochs( maxNumEpochs );
        
        trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl;
        if( !kmeans.train_( trainingData ) ){
            errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl;
            return false;
        }
        
        //Save the clusters
        clusters.push_back( kmeans.getClusters() );
        
        //Project the data through the current layer to use as training data for the next layer
        if( k+1 != K ){
            MatrixDouble data( M, numClustersPerLayer[k] );
            VectorDouble input( trainingData.getNumCols() );
            VectorDouble output( data.getNumCols() );
            
            for(UINT i=0; i<M; i++){
                
                //Copy the data into the sample
                for(UINT j=0; j<input.size(); j++){
                    input[j] = trainingData[i][j];
                }
                
                //Project the sample through the current layer
                if( !projectDataThroughLayer( input, output, k ) ){
                    errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl;
                    return false;
                }
                
                //Copy the result into the training data for the next layer
                for(UINT j=0; j<output.size(); j++){
                    data[i][j] = output[j];
                }
            }
            
            //Swap the data for the next layer
            trainingData = data;
            
        }
        
    }
    
    //Flag that the kmeans model has been trained
    trained = true;
    featureVector.resize( numOutputDimensions, 0 );
    
    return true;
}
int main(int argc, char** argv) {
	//initialize FreeImage library if needed
	#ifdef FREEIMAGE_LIB
	FreeImage_Initialise();
	#endif

	//hardcode the image names, create vectors and FreeImage library object
	char* file_in = "test.jpg";
	char* file_out = "test_result.jpg";
	vector<centroid> centroids(CENTROID_COUNT);

	// Use a standard array instead of vector
	centroid centroid_array[CENTROID_COUNT];

	vector<pixel> pixels;
	fipImage input;

	KMeans img;

	//generate centroids randomly
	mt19937 generator(chrono::system_clock::now().time_since_epoch().count());
	uniform_real_distribution<float> distro(0.0f, 1.0f);
	for(int i = 0; i < CENTROID_COUNT; ++i) {
		centroid temp;
		temp.r = distro(generator);
		temp.g = distro(generator);
		temp.b = distro(generator);

		img.centroids[i] = temp;
	}

	//open and load image as per convention from freeimage
	if(!input.load(file_in)) {
		cout << "Could not load file with name " << file_in << endl;
		return 1;
	}

	FREE_IMAGE_TYPE originalType = input.getImageType();

	if(!input.convertTo24Bits()) {
		cout << "Error occurred when converting pixels to float values." << endl;
		return 1;
	}

	// Assign common method results to variables to save access times
	unsigned int width = input.getWidth();
	unsigned int height = input.getHeight();

	//create pixel structs
	//access raw data
	//float* pixelData = reinterpret_cast<float*>(input.accessPixels());
	img.pixels.resize(width * height);

	for (unsigned int i = 0; i < width; ++i) {
		for (unsigned int j = 0; j < height; ++j) {
			pixel temp;
			byte colors[4];
			input.getPixelColor(i, j, reinterpret_cast<RGBQUAD*>(colors));
			temp.b = colors[0] / 255.0f;
			temp.g = colors[1] / 255.0f;
			temp.r = colors[2] / 255.0f;
			temp.cluster = -1;

			img.pixels[j * width + i] = temp;
		}
	}
	
	StopWatch timer;

	timer.start();
	for(int z = 0;z<1000;z++){
		img.assignCentroids();
		img.moveCentroids();
		//cout<<z<<endl;
	}
	img.assignFinalPixelColors();
	timer.stop();

	//write image
	//allocate output image
	fipImage output(FIT_BITMAP, width, height, 24);

	unsigned int outWidth = output.getWidth();
	unsigned int outHeight = output.getHeight();

	for(unsigned int i = 0; i < outWidth; ++i) {
		for(unsigned int j = 0; j < outHeight; ++j) {
			byte colors[4];
			int index = j * outWidth + i;
			colors[0] = static_cast<byte>(img.pixels[index].b * 255);
			colors[1] = static_cast<byte>(img.pixels[index].g * 255);
			colors[2] = static_cast<byte>(img.pixels[index].r * 255);

			output.setPixelColor(i, j, reinterpret_cast<RGBQUAD*>(colors));
		}
	}

	if(!output.convertToType(originalType)) {
		cout << "Could not convert back to 24 bits for image saving." << endl;
		return 1;
	}

	if(!output.save(file_out)) {
		cout << "Something went wrong with filesaving" << endl;
		return 1;
	}

	#ifdef FREEIMAGE_LIB
	FreeImage_Uninitialise();
	#endif
	
	return 0;
}
예제 #11
0
arma::vec Vespucci::Math::KMeansWrapper::Cluster(const arma::mat &data, const size_t clusters, arma::mat &centroids)
{
    using namespace mlpack::metric;
    using namespace mlpack::kmeans;
    arma::Row<size_t> assignments;
    arma::vec assignments_vec;
    if (allow_empty_){
        if (metric_ == "squaredeuclidean"){
            if (init_ == "sampleinitialization"){
                KMeans<SquaredEuclideanDistance, SampleInitialization, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<SquaredEuclideanDistance, RandomPartition, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "euclidean"){
            if (init_ == "sampleinitialization"){
                KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<EuclideanDistance, RefinedStart, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "manhattan"){
            if (init_ == "sampleinitialization"){
                KMeans<ManhattanDistance, SampleInitialization, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<ManhattanDistance, RandomPartition, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<ManhattanDistance, RefinedStart, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "chebyshev"){
            if (init_ == "sampleinitialization"){
                KMeans<ChebyshevDistance, SampleInitialization, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<ChebyshevDistance, RandomPartition, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<ChebyshevDistance, RefinedStart, AllowEmptyClusters> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
    }
    else{
        if (metric_ == "squaredeuclidean"){
            if (init_ == "sampleinitialization"){
                KMeans<SquaredEuclideanDistance, SampleInitialization> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<SquaredEuclideanDistance, RandomPartition> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<SquaredEuclideanDistance, RefinedStart> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "euclidean"){
            if (init_ == "sampleinitialization"){
                KMeans<EuclideanDistance, SampleInitialization> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<EuclideanDistance, RandomPartition> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<EuclideanDistance, RefinedStart> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "manhattan"){
            if (init_ == "sampleinitialization"){
                KMeans<ManhattanDistance, SampleInitialization> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<ManhattanDistance, RandomPartition> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<ManhattanDistance, RefinedStart> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
        else if (metric_ == "chebyshev"){
            if (init_ == "sampleinitialization"){
                KMeans<ChebyshevDistance, SampleInitialization> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "randompartition"){
                KMeans<ChebyshevDistance, RandomPartition> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
            else if (init_ == "refinedstart"){
                KMeans<ChebyshevDistance, RefinedStart> k;
                k.Cluster(data, clusters, assignments, centroids);
            }
        }
    }

    assignments_vec.set_size(assignments.n_elem);
    for (arma::uword i = 0; i < assignments.n_elem; ++i)
        assignments_vec(i) = double(assignments(i) + 1);
    return assignments_vec;
}
예제 #12
0
int main(int argc, char *argv[]) {
	google::ParseCommandLineFlags(&argc, &argv, true);
	google::InitGoogleLogging(argv[0]);

	petuum::HighResolutionTimer data_loading_timer;
	LOG(INFO)<< "training file location: " << FLAGS_train_file;
	KMeans kmeans;
	kmeans.ReadData();
	LOG(INFO)<< "Data Loading Complete. Loaded "  << kmeans.GetTrainingDataSize() << " in "  <<data_loading_timer.elapsed();

	//  kmeans.

	petuum::TableGroupConfig table_group_config;
	table_group_config.num_comm_channels_per_client =
			FLAGS_num_comm_channels_per_client;
	table_group_config.num_total_clients = FLAGS_num_clients;

	table_group_config.num_tables = 4;
	//  // + 1 for main() thread.
	table_group_config.num_local_app_threads = FLAGS_num_app_threads + 1;
	table_group_config.client_id = FLAGS_client_id;
	table_group_config.stats_path = FLAGS_stats_path;
	//
	petuum::GetHostInfos(FLAGS_hostfile, &table_group_config.host_map);
	if (std::string("SSP").compare(FLAGS_consistency_model) == 0) {
		table_group_config.consistency_model = petuum::SSP;
	} else if (std::string("SSPPush").compare(FLAGS_consistency_model) == 0) {
		table_group_config.consistency_model = petuum::SSPPush;
	} else if (std::string("LocalOOC").compare(FLAGS_consistency_model) == 0) {
		table_group_config.consistency_model = petuum::LocalOOC;
	} else {
		LOG(FATAL)<< "Unkown consistency model: " << FLAGS_consistency_model;
	}

	petuum::PSTableGroup::RegisterRow<petuum::DenseRow<float> >(
			kDenseRowFloatTypeID);
	petuum::PSTableGroup::RegisterRow<petuum::DenseRow<int> >(
			kDenseRowIntTypeID);
	//

	petuum::PSTableGroup::Init(table_group_config, false);
	//

	petuum::ClientTableConfig table_config;
	table_config.table_info.row_type = kDenseRowFloatTypeID;
	table_config.table_info.table_staleness = FLAGS_staleness;
	//  //table_config.table_info.row_capacity = feature_dim * num_labels;
	table_config.table_info.row_capacity = FLAGS_dimensionality;
	table_config.table_info.row_oplog_type = FLAGS_row_oplog_type;
	table_config.table_info.oplog_dense_serialized =
			FLAGS_oplog_dense_serialized;
	table_config.table_info.dense_row_oplog_capacity = FLAGS_dimensionality;
	//  //table_config.process_cache_capacity = 1;
	table_config.process_cache_capacity = FLAGS_num_centers;
	table_config.oplog_capacity = table_config.process_cache_capacity;
	petuum::PSTableGroup::CreateTable(FLAGS_centres_table_id, table_config);

	LOG(INFO) << "created centers table";


	//Objective Function table.
	table_config.table_info.dense_row_oplog_capacity = FLAGS_num_epochs+1;
	table_config.table_info.table_staleness = 0;
	petuum::PSTableGroup::CreateTable(FLAGS_objective_function_value_tableId, table_config);
	LOG(INFO) << "created objective values table";


	// Centers table
	table_config.table_info.row_type = kDenseRowIntTypeID;
	table_config.table_info.table_staleness = FLAGS_count_table_staleness;
	table_config.table_info.row_capacity = FLAGS_num_centers;
	table_config.process_cache_capacity = 1000;
	table_config.oplog_capacity = table_config.process_cache_capacity;
	petuum::PSTableGroup::CreateTable(FLAGS_center_count_tableId, table_config);


	// Table to hold the local deltas.
	petuum::PSTableGroup::CreateTable(FLAGS_update_centres_table_id, table_config);


	LOG(INFO) << "Completed creating tables" ;

	petuum::PSTableGroup::CreateTableDone();

	std::vector<std::thread> threads(FLAGS_num_app_threads);
	for (auto& thr : threads) {

		thr = std::thread(&KMeans::Start, std::ref(kmeans));
	}
	for (auto& thr : threads) {
		thr.join();
	}

	petuum::PSTableGroup::ShutDown();
	LOG(INFO)<< "Kmeans finished and shut down!";
	return 0;
}
예제 #13
0
void GMM::Init(const char* sampleFileName)
{
	const double MIN_VAR = 1E-10;

	KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
	kmeans->SetInitMode(KMeans::InitUniform);
	kmeans->Cluster(sampleFileName, "gmm_init.tmp");

	int* counts = new int[m_mixNum];
	double* overMeans = new double[m_dimNum];	// Overall mean of training data
	for (int i = 0; i < m_mixNum; i++)
	{
		counts[i] = 0;
		m_priors[i] = 0;
		memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
		memset(m_vars[i], 0, sizeof(double) * m_dimNum);
	}
	memset(overMeans, 0, sizeof(double) * m_dimNum);
	memset(m_minVars, 0, sizeof(double) * m_dimNum);

	// Open the sample and label file to initialize the model
	ifstream sampleFile(sampleFileName, ios_base::binary);
	//assert(sampleFile);

	ifstream labelFile("gmm_init.tmp", ios_base::binary);
	//assert(labelFile);

	int size = 0;
	sampleFile.read((char*)&size, sizeof(int));
	sampleFile.seekg(2 * sizeof(int), ios_base::beg);
	labelFile.seekg(sizeof(int), ios_base::beg);

	double* x = new double[m_dimNum];
	int label = -1;

	for (int i = 0; i < size; i++)
	{
		sampleFile.read((char*)x, sizeof(double) * m_dimNum);
		labelFile.read((char*)&label, sizeof(int));

		// Count each Gaussian
		counts[label]++;
		double* m = kmeans->GetMean(label);
		for (int d = 0; d < m_dimNum; d++)
		{
			m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
		}

		// Count the overall mean and variance.
		for (int d = 0; d < m_dimNum; d++)
		{
			overMeans[d] += x[d];
			m_minVars[d] += x[d] * x[d];
		}
	}

	// Compute the overall variance (* 0.01) as the minimum variance.
	for (int d = 0; d < m_dimNum; d++)
	{
		overMeans[d] /= size;
		m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
	}

	// Initialize each Gaussian.
	for (int i = 0; i < m_mixNum; i++)
	{
		m_priors[i] = 1.0 * counts[i] / size;

		if (m_priors[i] > 0)
		{
			for (int d = 0; d < m_dimNum; d++)
			{
				m_vars[i][d] = m_vars[i][d] / counts[i];

				// A minimum variance for each dimension is required.
				if (m_vars[i][d] < m_minVars[d])
				{
					m_vars[i][d] = m_minVars[d];
				}
			}
		}
		else
		{
			memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
			cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
		}
	}

	delete kmeans;
	delete[] x;
	delete[] counts;
	delete[] overMeans;

	sampleFile.close();
	labelFile.close();
}
예제 #14
0
void GMM::Init(double *data, int N)
{
	const double MIN_VAR = 1E-10;

	KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
	kmeans->SetInitMode(KMeans::InitUniform);
	int *Label;
	Label=new int[N];
	kmeans->Cluster(data,N,Label);

	int* counts = new int[m_mixNum];
	double* overMeans = new double[m_dimNum];	// Overall mean of training data
	for (int i = 0; i < m_mixNum; i++)
	{
		counts[i] = 0;
		m_priors[i] = 0;
		memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
		memset(m_vars[i], 0, sizeof(double) * m_dimNum);
	}
	memset(overMeans, 0, sizeof(double) * m_dimNum);
	memset(m_minVars, 0, sizeof(double) * m_dimNum);

	int size = 0;
	size=N;

	double* x = new double[m_dimNum];
	int label = -1;

	for (int i = 0; i < size; i++)
	{
		for(int j=0;j<m_dimNum;j++)
			x[j]=data[i*m_dimNum+j];
		label=Label[i];

		// Count each Gaussian
		counts[label]++;
		double* m = kmeans->GetMean(label);
		for (int d = 0; d < m_dimNum; d++)
		{
			m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
		}

		// Count the overall mean and variance.
		for (int d = 0; d < m_dimNum; d++)
		{
			overMeans[d] += x[d];
			m_minVars[d] += x[d] * x[d];
		}
	}

	// Compute the overall variance (* 0.01) as the minimum variance.
	for (int d = 0; d < m_dimNum; d++)
	{
		overMeans[d] /= size;
		m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
	}

	// Initialize each Gaussian.
	for (int i = 0; i < m_mixNum; i++)
	{
		m_priors[i] = 1.0 * counts[i] / size;

		if (m_priors[i] > 0)
		{
			for (int d = 0; d < m_dimNum; d++)
			{
				m_vars[i][d] = m_vars[i][d] / counts[i];

				// A minimum variance for each dimension is required.
				if (m_vars[i][d] < m_minVars[d])
				{
					m_vars[i][d] = m_minVars[d];
				}
			}
		}
		else
		{
			memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
			cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
		}
	}
	delete kmeans;
	delete[] x;
	delete[] counts;
	delete[] overMeans;
	delete[] Label;

}
int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);
    static int index = 0;
    ///Laser Data
    LMS1xx LaserSensor;
    scanCfg cfg;
    scanData data;
    scanDataCfg dataCfg;
    status_t status;

    std::ofstream file;

    double start_angle=0;
    double stop_angle=0;
    double resolution=0;
    double frequency=0;

    ///Kmeans realated variables
    KMeans<> K;
    mat dataset;
    size_t cluster;
    Col<size_t> assignments;
    mat centroid;


    ///Connect to the Lasersensor
    LaserSensor.connect(host);

    if(LaserSensor.isConnected())
    {
        std::cout << "\nConnected !!!\n";

        LaserSensor.login();

        ///Get Laser Configurations
        cfg = LaserSensor.getScanCfg();
        //cfg.angleResolution = 0.25*10000.0;
        //cfg.scaningFrequency = 25*100;

        //LaserSensor.setScanCfg(cfg);
        //LaserSensor.saveConfig();
       // sleep(3);
        cfg = LaserSensor.getScanCfg();
        start_angle = cfg.startAngle/10000.0; //* DEG2RAD - M_PI/2;
        stop_angle = cfg.stopAngle/10000.0; //* DEG2RAD - M_PI/2;
        resolution = cfg.angleResolution/10000.0;
        frequency = cfg.scaningFrequency/100;

        std::cout << "Start Angle: " << start_angle;
        std::cout << "\tStop Angle: " << stop_angle;
        std::cout << "\tResolution: " << resolution;
        std::cout << "\tFrequency: " << frequency;
        std::cout << std::endl;

        dataCfg.outputChannel = 1;
        dataCfg.remission = true;
        dataCfg.resolution = 1;
        dataCfg.encoder = 0;
        dataCfg.position = false;
        dataCfg.deviceName = false;
        dataCfg.outputInterval = 1;

        LaserSensor.setScanDataCfg(dataCfg); ///Set Data Configuration of the laser data

        LaserSensor.startMeas();    ///Start Measurement

        do
        {
            status = LaserSensor.queryStatus();
            usleep(200);
        }
        while(status != ready_for_measurement);
        {
            LaserSensor.startDevice();
            LaserSensor.scanContinous(1);

            while(LaserSensor.isConnected())
            {
                LaserSensor.getData(data);  ///Get the Laser Data

                //                u_int16_t range[data.dist_len1];
                //                u_int16_t intensity[data.rssi_len1];
                int range[data.dist_len1];
                int intensity[data.rssi_len1];

                for(int i=0; i<data.dist_len1;i++)
                    range[i] = data.dist1[i];

                for(int i=0; i<data.rssi_len1;i++)
                    intensity[i] = data.rssi1[i];

                if (index == 0)
                {
                    index++;
                    std::cout << std::endl << "Data len = " << data.dist_len1 << std::endl;
                    std::cout << "Intensity len = " << data.rssi_len1 << std::endl;

                    ///distance assumed to be in mm
                    ///Start angle is -45 end is 225
                    float angle_scan = -45.0;
                    float x[1081], y[1081];   ///The resolution is 0.5 degress so 541 values
                    int index_range = 0;
                    double slope;
                    cluster = 2;
                    //centroid.zeros();
                    dataset.resize(2,1081);
                    dataset.zeros();

                    file.open("LaserData.txt");

                    while(1)
                    {
                        x[index_range] = range[index_range]*cos(angle_scan*DEG2RAD)/1000.0;
                        y[index_range] = range[index_range] * sin(angle_scan*DEG2RAD)/1000.0;
                        //std::cout << "range: " << range[index_range] << " angle: " << angle_scan;
                        //std::cout << " x: " << x[index_range] << " y : " << y[index_range] << std::endl;
                        angle_scan += 0.25;

                        //if(intensity[index_range] >=850)
                        {
                            file << x[index_range] << "," << y[index_range] << "," << intensity[index_range] << std::endl;
                        }

                        if (angle_scan > 225.0)
                        {
                            break;
                        }
                        index_range++;
                        usleep(100);
                    }
                    int index_tmp = 0;
                    for(int i=0; i<1081;i++)
                    {
                        if (intensity[i] >= 900)
                        {
                            dataset(0,index_tmp) = x[i];
                            dataset(1,index_tmp) = y[i];

                            std::cout << "\n" << dataset[0,index_tmp] << "\t" << dataset[1,index_tmp];
                            index_tmp++;
                        }
                    }
                    std::cout << "\nKMeans Calculations!!!" << std::endl;
                    dataset.resize(2,index_tmp);

                    ///Actual KMeans CLustering
                    K.Cluster((arma::mat) dataset,2,assignments,centroid);

/*************************************************************************************************************************
                    static double sum_x[2];
                    static double sum_y[2];
                    int number_dist1=0;
                    int number_dist2=0;

                    for(int i=0; i < assignments.size(); i++) {
                        switch(assignments[i])
                        {
                            case 0:
                                sum_x[0]+=dataset(0,i);
                                sum_y[0]+=dataset(1,i);
                                number_dist1++;
                                break;
                            case 1:
                                sum_x[1]+=dataset(0,i);
                                sum_y[1]+=dataset(1,i);
                                number_dist2++;
                                break;
                        };

                        std::cout << "\n" << assignments[i];
                    }

                    double center1_x, center1_y;
                    double center2_x, center2_y;

                    center1_x = sum_x[0]/number_dist1;
                    center1_y = sum_y[0]/number_dist1;
                    center2_x = sum_x[1]/number_dist2;
                    center2_y = sum_y[1]/number_dist2;

                    std::cout<<center1_x<<"," << center1_y<<"   " << center2_x<<","<<center2_y<<endl;

**************************************************************************************************************************/

//std::cout << "\n" << centroid(0,0) << "\t" << centroid(1,0) << "\t"<< centroid(0,1) << "\t"<< centroid(1,1)<<"\n";

                    slope = (centroid(1,1) - centroid(1,0)) / (centroid(0,1) - centroid(0,0));
                    slope = (atan(slope))*RAD2DEG;

                    std::cout << "\nclusters= " << cluster << std::endl;
                    std::cout << "\nOrientation= " << slope << std::endl;
                }
                usleep(200);
            }
            std::cout << "\n Sensor Disconnected \n";

            ///Disconnect the Laser
            LaserSensor.scanContinous(0);
            LaserSensor.stopMeas();
            LaserSensor.disconnect();
            file.close();
        }
    }
    else
    {
        std::cout <<"\nSensor Not Connected !!!\n";
    }

    return a.exec();
}
예제 #16
0
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){

    error = 0;
    threshold = 0;

    const UINT M = trainingData.getNumSamples();
    const UINT K = (UINT)classLabels.size();

    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    VectorFloat groupCounter(2,0);
    MatrixFloat classProbabilities(K,2);

    //Use this data to train a KMeans cluster with 2 clusters
    KMeans kmeans;
    kmeans.setNumClusters( 2 );
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-5 );
    kmeans.setMinNumEpochs( 1 );
    kmeans.setMaxNumEpochs( 100 );

    //Disable the logging to clean things up
    kmeans.setTrainingLoggingEnabled( false );

    if( !kmeans.train_( data ) ){
        errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl;
        return false;
    }

    //Set the split threshold as the mid point between the two clusters
    const MatrixFloat &clusters = kmeans.getClusters();
    threshold = 0;
    for(UINT i=0; i<clusters.getNumRows(); i++){
        threshold += clusters[i][0];
    }
    threshold /= clusters.getNumRows();

    //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
    groupCounter[0] = groupCounter[1] = 0;
    classProbabilities.setAllValues(0);
    for(UINT i=0; i<M; i++){
        groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
        groupCounter[ groupIndex[i] ]++;
        classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
    }

    //Compute the class probabilities for the lhs group and rhs group
    for(UINT k=0; k<K; k++){
        classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
        classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
    }

    //Compute the Gini index for the lhs and rhs groups
    giniIndexL = giniIndexR = 0;
    for(UINT k=0; k<K; k++){
        giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
        giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
    }
    weightL = groupCounter[0]/M;
    weightR = groupCounter[1]/M;
    error = (giniIndexL*weightL) + (giniIndexR*weightR);

    return true;
}
예제 #17
0
void RefinedStart::Cluster(const MatType& data,
                           const size_t clusters,
                           arma::Col<size_t>& assignments) const
{
  math::RandomSeed(std::time(NULL));

  // This will hold the sampled datasets.
  const size_t numPoints = size_t(percentage * data.n_cols);
  MatType sampledData(data.n_rows, numPoints);
  // vector<bool> is packed so each bool is 1 bit.
  std::vector<bool> pointsUsed(data.n_cols, false);
  arma::mat sampledCentroids(data.n_rows, samplings * clusters);

  // We will use these objects repeatedly for clustering.
  arma::Col<size_t> sampledAssignments;
  arma::mat centroids;
  KMeans<> kmeans;

  for (size_t i = 0; i < samplings; ++i)
  {
    // First, assemble the sampled dataset.
    size_t curSample = 0;
    while (curSample < numPoints)
    {
      // Pick a random point in [0, numPoints).
      size_t sample = (size_t) math::RandInt(data.n_cols);

      if (!pointsUsed[sample])
      {
        // This point isn't used yet.  So we'll put it in our sample.
        pointsUsed[sample] = true;
        sampledData.col(curSample) = data.col(sample);
        ++curSample;
      }
    }

    // Now, using the sampled dataset, run k-means.  In the case of an empty
    // cluster, we re-initialize that cluster as the point furthest away from
    // the cluster with maximum variance.  This is not *exactly* what the paper
    // implements, but it is quite similar, and we'll call it "good enough".
    kmeans.Cluster(sampledData, clusters, sampledAssignments, centroids);

    // Store the sampled centroids.
    sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids;

    pointsUsed.assign(data.n_cols, false);
  }

  // Now, we run k-means on the sampled centroids to get our final clusters.
  kmeans.Cluster(sampledCentroids, clusters, sampledAssignments, centroids);

  // Turn the final centroids into assignments.
  assignments.set_size(data.n_cols);
  for (size_t i = 0; i < data.n_cols; ++i)
  {
    // Find the closest centroid to this point.
    double minDistance = std::numeric_limits<double>::infinity();
    size_t closestCluster = clusters;

    for (size_t j = 0; j < clusters; ++j)
    {
      const double distance = kmeans.Metric().Evaluate(data.col(i),
          centroids.col(j));

      if (distance < minDistance)
      {
        minDistance = distance;
        closestCluster = j;
      }
    }

    // Assign the point to its closest cluster.
    assignments[i] = closestCluster;
  }
}