Beispiel #1
0
int main(){
    int N = 10;
    int D = 5;
    int K = 4;
    int rand_seed = 2;
    double* X = gen_matrix(N,D);
    // Build ball tree on data set
    VpTree<DataPoint, euclidean_distance>* tree = new VpTree<DataPoint, euclidean_distance>();
    vector<DataPoint> obj_X(N, DataPoint(D, -1, X));
    std::srand((unsigned int) rand_seed);
    for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D);
    std::random_shuffle(obj_X.begin(), obj_X.end());
    pd(obj_X);
    cout << "size:" << obj_X.size() << endl;
    // cout << "1:" << obj_X[1] << endl;
    tree->create(obj_X);

    vector<DataPoint> indices;
    vector<double> distances;
    for(int n = 0; n < N; n++) {

        // Find nearest neighbors
        indices.clear();
        distances.clear();
        tree->search(obj_X[n], K + 1, &indices, &distances);
        std::cout << n << std::endl;
        pv(indices);
        pv(distances);
    }
    free(X);
    delete tree;
}
Beispiel #2
0
void TSNE<NDims>::computeGaussianPerplexity(double* X, unsigned int N, int D, int K) {

    if(perplexity > K) Rprintf("Perplexity should be lower than K!\n");

    // Allocate the memory we need
    setupApproximateMemory(N, K);

    // Build ball tree on data set
      VpTree<DataPoint, T>* tree = new VpTree<DataPoint, T>();
      vector<DataPoint> obj_X(N, DataPoint(D, -1, X));
      for(unsigned int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D);
      tree->create(obj_X);

      // Loop over all points to find nearest neighbors
      if (verbose) Rprintf("Building tree...\n");

      int steps_completed = 0;
      #pragma omp parallel for schedule(guided) num_threads(num_threads)
      for(unsigned int n = 0; n < N; n++) {

        vector<DataPoint> indices;
        vector<double> distances;
        indices.reserve(K+1);
        distances.reserve(K+1);

        // Find nearest neighbors
        tree->search(obj_X[n], K + 1, &indices, &distances);

        double * cur_P = val_P.data() + row_P[n];
        computeProbabilities(perplexity, K, distances.data()+1, cur_P); // +1 to avoid self.

        unsigned int * cur_col_P = col_P.data() + row_P[n];
        for (int m=0; m<K; ++m) {
            cur_col_P[m] = indices[m+1].index(); // +1 to avoid self.
        }

        #pragma omp atomic
        ++steps_completed;

        if (verbose) { 
          if(steps_completed % 10000 == 0) Rprintf(" - point %d of %d\n", steps_completed, N);
        }
      }

      // Clean up memory
      obj_X.clear();
      delete tree;
}
Beispiel #3
0
void testVpTree2()
{
    VpTree<Point<int>, int, EuclideanDistance<Point<int> >::DistanceIncremental> tree;
    int N = 1500000;
    for(int i = 0; i < N; ++i)
    {
        tree.insert(Point<int>(GlobalRNG.next(), GlobalRNG.next()), i);
    }
    int M = 15000;
    for(int i = 0; i < M; ++i)
    {
        assert(tree.nearestNeighbor(Point<int>(GlobalRNG.next(), GlobalRNG.next())));
        int k = 100;
        assert(tree.kNN(Point<int>(GlobalRNG.next(), GlobalRNG.next()), k).getSize() == k);
    }
}
Beispiel #4
0
void testVpTree()
{
    VpTree<Point<int>,int, EuclideanDistance<Point<int> >::DistanceIncremental> tree;
    int N = 5;
    for(int i = 0; i < N; ++i)
    {
        tree.insert(Point<int>(i, i), i);
    }
    for(int i = 0; i < N; ++i)
    {
        int* result = tree.find(Point<int>(i, i));
        if(result) DEBUG(*result);
        assert(result && *result == i);
    }

    Vector<VpTree<Point<int>,int, EuclideanDistance<Point<int> >::DistanceIncremental>::NodeType*> result2 = tree.distanceQuery(Point<int>(0, 4), sqrt(10));

    for(int i = 0; i < result2.getSize(); ++i)
    {
        DEBUG(result2[i]->key[0]);
        DEBUG(result2[i]->key[1]);
    }

}
KNNGraph_FixedK_MST::KNNGraph_FixedK_MST(std::shared_ptr<gsl::Matrix> data, PropertyList property_list)
    : data_(data), property_list_(property_list) {

  if (property_list_[KNNGRAPH_FIXED_K_NUMBER] == 0.0) property_list_[KNNGRAPH_FIXED_K_NUMBER] = 30.0;
  if (property_list_[KNNGRAPH_FIXED_K_WITH_MST_EDGE_POOL_DEPTH] == 0.0)
    property_list_[KNNGRAPH_FIXED_K_WITH_MST_EDGE_POOL_DEPTH] = property_list_[KNNGRAPH_FIXED_K_NUMBER] + 100;

  if (property_list_[KNNGRAPH_GRAPH_BACKEND] == KNNGRAPH_GRAPH_BACKEND_ADJACENCYLIST) {
    knngraph_ = std::make_shared<GraphUtils::AdjacencyList>(data_->rows());
  } else if (property_list_[KNNGRAPH_GRAPH_BACKEND] == KNNGRAPH_GRAPH_BACKEND_BOOST) {
    knngraph_ = std::make_shared<GraphUtils::BoostAdjacencyList>(data_->rows());
  } else {
    throw std::invalid_argument("Invalid KNNGRAPH_GRAPH_BACKEND value.");
  }
  kIndex FIXED_K = static_cast<Index>(property_list_[KNNGRAPH_FIXED_K_NUMBER]);
  kIndex MST_EDGE_POOL_DEPTH = static_cast<Index>(property_list_[KNNGRAPH_FIXED_K_WITH_MST_EDGE_POOL_DEPTH]);

  LOGI("Constructing kNN graph with FixedK_MST.")

  VpTree<PixelView, SquaredDistance> vptree;
  LOGI("Create PixelView array.");
  auto pixel_views = CreatePixelViewsFromMatrix(*data_);
  LOGI("Creating VpTree for the current data matrix.")
  vptree.create(pixel_views);
  LOGI("VpTree created. Now creating kNN graph.")

  UnionFind uf(data_->rows());
  std::vector<UndirectedEdge> unused_edges;

  for (Index n = 0; n < data_->rows(); ++n) {
    PixelView current_pixel = pixel_views[n];
    std::vector<PixelView> results;
    std::vector<Scalar> distance_squares;
    vptree.search(current_pixel, MST_EDGE_POOL_DEPTH, &results, &distance_squares);

    for (Index j = 0; j < MST_EDGE_POOL_DEPTH; ++j) {
      if (j < FIXED_K) {
        knngraph_->Connect(current_pixel.index, results[j].index, std::sqrt(distance_squares[j]));
        uf.Connect(current_pixel.index, results[j].index);
      } else {
        unused_edges.push_back(UndirectedEdge(current_pixel.index, results[j].index, std::sqrt(distance_squares[j])));
      }
    }
  }

  LOGI("kNN graph without MST has " << uf.count() << " connected parts.")

  if (uf.count() != 1) {
    LOGI("Start augmenting MST.")

    std::sort(std::begin(unused_edges), std::end(unused_edges), [](UndirectedEdge a, UndirectedEdge b) {
      return a.weight < b.weight;
    });

    Index augment_count = 0;
    for (auto edge : unused_edges) {
      Index old_count = uf.count();
      uf.Connect(edge.index_a, edge.index_b);
      if (uf.count() < old_count) {
        knngraph_->Connect(edge.index_a, edge.index_b, edge.weight);
        augment_count++;
      }
      if (uf.count() == 1) break;
    }

    LOGI(augment_count << " edges augmented.")

    if (uf.count() != 1) {
      throw std::invalid_argument(
          "MST Augmentation failed -- need to increase KNNGRAPH_FIXED_K_WITH_MST_EDGE_POOL_DEPTH");
    }

  }
Beispiel #6
0
// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free)
void TSNE::computeGaussianPerplexity(double* X, int N, int D, int** _row_P, int** _col_P, double** _val_P, double perplexity, int K) {
    
    if(perplexity > K) printf("Perplexity should be lower than K!\n");
    
    // Allocate the memory we need
    *_row_P = (int*)    malloc((N + 1) * sizeof(int));
    *_col_P = (int*)    calloc(N * K, sizeof(int));
    *_val_P = (double*) calloc(N * K, sizeof(double));
    if(*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { cout<<"Memory allocation failed!\n"; }
    int* row_P = *_row_P;
    int* col_P = *_col_P;
    double* val_P = *_val_P;
    double* cur_P = (double*) malloc((N - 1) * sizeof(double));
    if(cur_P == NULL) { cout<<"Memory allocation failed!\n"; }
    row_P[0] = 0;
    for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + K;    
    
    // Build ball tree on data set
    VpTree<DataPoint, euclidean_distance>* tree = new VpTree<DataPoint, euclidean_distance>();
    vector<DataPoint> obj_X(N, DataPoint(D, -1, X));
    for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D);
    tree->create(obj_X);
    
    // Loop over all points to find nearest neighbors
    printf("Building tree...\n");
    vector<DataPoint> indices;
    vector<double> distances;
    for(int n = 0; n < N; n++) {
        
        if(n % 10000 == 0) printf(" - point %d of %d\n", n, N);
        
        // Find nearest neighbors
        indices.clear();
        distances.clear();
        tree->search(obj_X[n], K + 1, &indices, &distances);
        
        // Initialize some variables for binary search
		bool found = false;
		double beta = 1.0;
		double min_beta = -DBL_MAX;
		double max_beta =  DBL_MAX;
		double tol = 1e-5;
		
		// Iterate until we found a good perplexity
		int iter = 0; double sum_P;
		while(!found && iter < 200) {
			
			// Compute Gaussian kernel row
			for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1]);
			
			// Compute entropy of current row
			sum_P = DBL_MIN;
			for(int m = 0; m < K; m++) sum_P += cur_P[m];
			double H = .0;
			for(int m = 0; m < K; m++) H += beta * (distances[m + 1] * cur_P[m]);
			H = (H / sum_P) + log(sum_P);
			
			// Evaluate whether the entropy is within the tolerance level
			double Hdiff = H - log(perplexity);
			if(Hdiff < tol && -Hdiff < tol) {
				found = true;
			}
			else {
				if(Hdiff > 0) {
					min_beta = beta;
					if(max_beta == DBL_MAX || max_beta == -DBL_MAX)
						beta *= 2.0;
					else
						beta = (beta + max_beta) / 2.0;
				}
				else {
					max_beta = beta;
					if(min_beta == -DBL_MAX || min_beta == DBL_MAX)
						beta /= 2.0;
					else
						beta = (beta + min_beta) / 2.0;
				}
			}
			
			// Update iteration counter
			iter++;
		}
		
		// Row-normalize current row of P and store in matrix
        for(int m = 0; m < K; m++) cur_P[m] /= sum_P;
        for(int m = 0; m < K; m++) {
            col_P[row_P[n] + m] = indices[m + 1].index();
            val_P[row_P[n] + m] = cur_P[m];
        }
    }
    
    // Clean up memory
    obj_X.clear();
    free(cur_P);
    delete tree;
}
int _tmain(int argc, _TCHAR* argv[]) {
#else
int main(int argc, char **argv) {
#endif

  if(argc != 2 && argc != 3) {
    fprintf(stderr, "Usage: sc <img1>\n");
    return 1;
  }

  int spSize = 5;
  if(argc == 3) {
    sscanf(argv[2], "%d", &spSize);
  }

  ImageFile imgFile (argv[1]);
  if(!imgFile.Load()) {
    fprintf(stderr, "Error loading file: %s\n", argv[1]);
    return 1;
  }

  FasTC::Image<> *img = imgFile.GetImage();

  const int kWidth = img->GetWidth();
  const int kHeight = img->GetHeight();
  const int nPixels = kWidth * kHeight;
  const uint32 pixelBufSz = nPixels * sizeof(FasTC::Pixel);

  FasTC::Pixel *pixels = new FasTC::Pixel[pixelBufSz];
  memcpy(pixels, img->GetPixels(), pixelBufSz);

  uint32 *rawPixels = new uint32[kWidth * kHeight];

  for(int i = 0; i < nPixels; i++) {
    // Pixels are stored as little endian ARGB, so we want ABGR
    pixels[i].Shuffle(0x6C); // 01 10 11 00
    rawPixels[i] = pixels[i].Pack();
  }

  int *labels = new int[nPixels];
  int numLabels;

  SLIC slic;
  slic.PerformSLICO_ForGivenStepSize(
    rawPixels,
	kWidth,
    kHeight,
    labels,
    numLabels,
	spSize, 1.0);

  std::unordered_map<uint32, Region> regions;
  CollectPixels(kWidth, kHeight, pixels, labels, regions);
  std::cout << "Num regions: " << regions.size() << std::endl;

  for(auto &r : regions) {
    r.second.Compress();
    r.second.Reconstruct();
  }

  for(int i = 0; i < nPixels; i++) {
    pixels[i] = regions[labels[i]].GetNextPixel();
    pixels[i].Shuffle(0x6C);
  }

  std::vector<Partition<4, 4> > partitions;
  EnumerateBPTC(partitions);
  std::cout << partitions.size() << " 4x4 BPTC partitions" << std::endl;

  VpTree<Partition<4, 4>, Partition<4, 4>::Distance> vptree;
  vptree.create(partitions);

  // Just to test, find the partition close to half 0 half 1..
  Partition<4, 4> test;
  for(uint32 i = 0; i < 16; i++) {
    if(i < 8) {
      test[i] = 0;
    } else {
      test[i] = 1;
    }
  }

  vector<Partition<4, 4> > closest;
  vptree.search(test, 1, &closest, NULL);
  std::cout << closest[0].GetIndex() << std::endl;

  BPTCC::CompressionSettings settings;
  settings.m_NumSimulatedAnnealingSteps = 0;
  settings.m_ShapeSelectionFn = ChosePresegmentedShape<4, 4>;

  SelectionInfo info(vptree, labels, kWidth, kHeight);
  settings.m_ShapeSelectionUserData = &info;

  uint8 *outBuf = new uint8[kWidth * kHeight];
  FasTC::CompressionJob cj(
     FasTC::eCompressionFormat_BPTC,
     reinterpret_cast<const uint8 *>(pixels),
     outBuf,
     static_cast<uint32>(kWidth),
     static_cast<uint32>(kHeight));

  StopWatch sw;
  sw.Start();
  BPTCC::Compress(cj, settings);
  sw.Stop();
  std::cout << "Compression time: " << sw.TimeInMilliseconds() << "ms" << std::endl;

  CompressedImage ci(kWidth, kHeight, FasTC::eCompressionFormat_BPTC, outBuf);
  FasTC::Image<> outImg(kWidth, kHeight, pixels);

  std::cout << "PSNR: " << outImg.ComputePSNR(&ci) << "db" << std::endl;

  ImageFile outImgFile("out.png", eFileFormat_PNG, outImg);
  outImgFile.Write();

  delete [] labels;
  delete [] rawPixels;
  delete [] pixels;
  return 0;
}
Beispiel #8
0
// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free)
void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P,
	double perplexity, int K, float gpu_mem, int verbose) {
	float start;
	float end;

	if (perplexity > K) printf("Perplexity should be lower than K!\n");

	// Allocate the memory we need
	*_row_P = (unsigned int*)malloc((N + 1) * sizeof(unsigned int));
	*_col_P = (unsigned int*)calloc(N * K, sizeof(unsigned int));
	*_val_P = (double*)calloc(N * K, sizeof(double));
	if (*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
	unsigned int* row_P = *_row_P;
	unsigned int* col_P = *_col_P;
	double* val_P = *_val_P;
	double* cur_P = (double*)malloc((N - 1) * sizeof(double));
	if (cur_P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
	row_P[0] = 0;
	for (int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int)K;

	// Build ball tree on data set
	VpTree<DataPoint, euclidean_distance>* tree = new VpTree<DataPoint, euclidean_distance>();
	vector<DataPoint> obj_X(N, DataPoint(D, -1, X));
	for (int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D);
	tree->create(obj_X);

	//If using gpu then calculate and save all euclidean distances and use those to search the tree
	EuclideanDistances* all_distances = new EuclideanDistances();
	if (gpu_mem > 0){
		if (gpu_mem > 1){
			printf("Requested GPU memory needs to be between 0 and 1 (of total available memory). Setting it to 0.8.");
			gpu_mem = 0.8;
		}
		start = clock();
		computeSquaredEuclideanDistanceOnGpu(X, X, all_distances, N, D, gpu_mem, verbose);
		tree->set_all_euclidean_distances(*all_distances);
		end = clock();
		if(verbose > 1) printf("Time spend in calculating all distances in GPU: %f\n", float(end - start) / CLOCKS_PER_SEC);
	}
	else{
		if (verbose > 0) printf("Using CPU to calculate distances during tree search\n");
	}

	// Loop over all points to find nearest neighbors
	if (verbose > 0) printf("Building tree...\n");
	vector<DataPoint> indices;
	vector<double> distances;

	start = clock();
	for (int n = 0; n < N; n++) {
		if (n % 10000 == 0 && verbose > 1) printf(" - Building tree and finding perplexities, point %d of %d\n", n, N);
		// Find nearest neighbors
		indices.clear();
		distances.clear();
		tree->search(obj_X[n], K + 1, &indices, &distances);

		// Initialize some variables for binary search
		bool found = false;
		double beta = 1.0;
		double min_beta = -DBL_MAX;
		double max_beta = DBL_MAX;
		double tol = 1e-5;

		// Iterate until we found a good perplexity
		int iter = 0; double sum_P;
		while (!found && iter < 200) {

			// Compute Gaussian kernel row
			for (int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1]);

			// Compute entropy of current row
			sum_P = DBL_MIN;
			for (int m = 0; m < K; m++) sum_P += cur_P[m];
			double H = .0;
			for (int m = 0; m < K; m++) H += beta * (distances[m + 1] * cur_P[m]);
			H = (H / sum_P) + log(sum_P);

			// Evaluate whether the entropy is within the tolerance level
			double Hdiff = H - log(perplexity);
			if (Hdiff < tol && -Hdiff < tol) {
				found = true;
			}
			else {
				if (Hdiff > 0) {
					min_beta = beta;
					if (max_beta == DBL_MAX || max_beta == -DBL_MAX)
						beta *= 2.0;
					else
						beta = (beta + max_beta) / 2.0;
				}
				else {
					max_beta = beta;
					if (min_beta == -DBL_MAX || min_beta == DBL_MAX)
						beta /= 2.0;
					else
						beta = (beta + min_beta) / 2.0;
				}
			}

			// Update iteration counter
			iter++;
		}

		// Row-normalize current row of P and store in matrix
		for (unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P;
		for (unsigned int m = 0; m < K; m++) {
			col_P[row_P[n] + m] = (unsigned int)indices[m + 1].index();
			val_P[row_P[n] + m] = cur_P[m];
		}
		distances.clear();
		indices.clear();
	}
	end = clock();
	if (verbose > 1) printf("Time spend in building tree and finding perplexities: %f\n", float(end - start) / CLOCKS_PER_SEC);

	// Clean up memory
	obj_X.clear();
	free(cur_P);
	delete tree;
	delete all_distances;
}