void kmeans()
{
	const vl_size data_dim = 2;
	const vl_size num_data = 1000;

	float data[data_dim * num_data] = { 0.0f, };
	for (vl_size i = 0; i < data_dim * num_data; ++i)
		data[i] = (float)std::rand() / RAND_MAX;

	//
	std::cout << "start processing ..." << std::endl;

	const vl_size num_clusters = 3;
	const VlVectorComparisonType distance = VlDistanceL2;
	const vl_size num_max_iterations = 100;

	VlKMeans *kmeans = vl_kmeans_new(VL_TYPE_DOUBLE, distance);

	vl_kmeans_set_max_num_iterations(kmeans, num_max_iterations);

	if (true)
	{
		// initialization.
#if 1
		vl_kmeans_init_centers_with_rand_data(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#elif 0
		vl_kmeans_init_centers_plus_plus(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#else
		{
			float init_centers[data_dim * num_clusters] = { 0.0f, };
			for (vl_size i = 0; i < data_dim * num_clusters; ++i)
				init_centers[i] = (float)std::rand() / RAND_MAX;
			vl_kmeans_set_centers(kmeans, (void const *)init_centers, data_dim, num_clusters);
		}
#endif

		// clustering.
		const double energy = vl_kmeans_refine_centers(kmeans, data, num_data);
	}
	else
	{
		const VlKMeansAlgorithm algorithm = VlKMeansLloyd;  // VlKMeansLloyd, VlKMeansElkan, VlKMeansANN.
		const VlKMeansInitialization initialization = VlKMeansRandomSelection;  // VlKMeansRandomSelection, VlKMeansPlusPlus.
		const vl_size num_repetitions = 100;

		vl_kmeans_set_algorithm(kmeans, VlKMeansLloyd);
		vl_kmeans_set_initialization(kmeans, VlKMeansRandomSelection);
		vl_kmeans_set_num_repetitions(kmeans, num_repetitions);

		// clustering.
		const double energy = vl_kmeans_cluster(kmeans, (void const *)data, data_dim, num_data, num_clusters);
	}

	//
	const vl_size num_iterations = vl_kmeans_get_num_repetitions(kmeans);
	//const vl_type data_type = vl_kmeans_get_data_type(kmeans);

	//
	{
		const float *centers = (float *)vl_kmeans_get_centers(kmeans);
		for (int i = 0; i < num_clusters; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << centers[i * data_dim + j];
			}
			std::cout << ')' << std::endl;
		}
	}

	//
	{
		vl_uint32 assignments[num_data] = { 0, };
		double distances[num_data] = { 0, };
		vl_kmeans_quantize(kmeans, assignments, (void *)distances, (void const *)data, num_data);

		for (int i = 0; i < num_data; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << data[i * data_dim + j];  // TODO [check] >> is it correct?
			}
			std::cout << ") => " << assignments[i] << std::endl;
		}
	}

	std::cout << "end processing ..." << std::endl;

	//
	if (kmeans)
	{
		vl_kmeans_delete(kmeans);
		kmeans = NULL;
	}
}
示例#2
0
  bool VLFeat::CalculateCommon(int f, bool all, int l) {
    string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+
      ToStr(l)+") : ";

    // if (!do_fisher && !do_vlad) {
    //   cerr << msg
    // 	   << "either encoding=fisher or encoding=vlad should be specified"
    // 	   << endl;
    //   return false;
    // }

    if (!gmm && !kmeans) {
      cerr << msg << "either gmm=xxx or kmeans=xxx option should be given"
	   << endl;
      return false;
    }

    cox::tictac::func tt(tics, "VLFeat::CalculateCommon");
    
    // obs! only some parameters here, should be in ProcessOptionsAndRemove()
    // too, also scales and geometry should be made specifiable...
    bool normalizeSift = false, renormalize = true, flat_window = true;
    size_t step = 3, binsize = 8;

    EnsureImage();

    int width = Width(true), height = Height(true);

    if (FrameVerbose())
      cout << msg+"wxh="
	   << width << "x" << height << "=" << width*height << endl;

    vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 };

    imagedata idata = CurrentFrame();
    idata.convert(imagedata::pixeldata_float);
    idata.force_one_channel(rgbcoeff);

    vector<float> dsift;
    size_t descr_size_orig = 0, descr_size_final = 0;
    vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 };
    // vector<float> scales { 1.0000 };
    for (size_t i=0; i<scales.size(); i++) {
      if (KeyPointVerbose())
	cout << "Starting vl_dsift_process() in scale " << scales[i] << endl;
      
      imagedata simg = idata;
      if (scales[i]!=1) {
	scalinginfo si(simg.width(), simg.height(),
		       (int)floor(scales[i]*simg.width()+0.5),
		       (int)floor(scales[i]*simg.height()+0.5));
	simg.rescale(si, 1);
      }

      // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height());
      VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(),
					     step, binsize);

      // opts.scales = logspace(log10(1), log10(.25), 5) ;
      // void vl_dsift_set_bounds	(	VlDsiftFilter * 	self,
      // 					int 	minX,
      // 					int 	minY,
      // 					int 	maxX,
      // 					int 	maxY 
      // 					);	
      
      // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 };
      // vl_dsift_set_geometry(sf, &geom);
      
      //vl_dsift_set_steps(sf, 3, 3);

      //vl_dsift_set_window_size(sf, 8);

      vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab

      vector<float> imgvec = simg.get_float();
      const float *img_fp = &imgvec[0];
      // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " "
      //      << img_fp[2] << " ... " << img_fp[41] << endl;

      vl_dsift_process(sf, img_fp);
      
      // if opts.rootSift // false
      // 		descrs{si} = sqrt(descrs{si}) ;
      // end
      // 	if opts.normalizeSift //true
      // 		  descrs{si} = snorm(descrs{si}) ;
      // end

      descr_size_orig = sf->descrSize;
      size_t nf = sf->numFrames;
      const VlDsiftKeypoint *k = sf->frames;
      float *d  = sf->descrs;
      
      if (KeyPointVerbose())
	cout << "  found " << sf->numFrames << " 'frames' in "
	     << simg.info() << endl
	     << "  descriptor dim " << descr_size_orig << endl;
      
      if (PixelVerbose())
	for (size_t i=0; i<nf; i++) {
	  cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
	       << " s=" << k[i].s << " norm=" << k[i].norm;
	  if (FullVerbose()) {
	    cout << " RAW";
	    for (size_t j=0; j<descr_size_orig; j++)
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  cout << endl;
	}

      if (normalizeSift) {
	for (size_t i=0; i<nf; i++) {
	  if (PixelVerbose())
	    cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
		 << " s=" << k[i].s << " norm=" << k[i].norm;
	  double mul = 0.0;
	  for (size_t j=0; j<descr_size_orig; j++)
	    mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j];
	  if (mul)
	    mul = 1.0/sqrt(mul);
	  if (FullVerbose())
	    cout << " NORM";
	  for (size_t j=0; j<descr_size_orig; j++) {
	    d[i*descr_size_orig+j] *= mul;
	    if (FullVerbose())
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  if (PixelVerbose())
	    cout << endl;
	}
      }

      if (!pca.vector_length()) {
	dsift.insert(dsift.end(), d, d+nf*descr_size_orig);
	descr_size_final = descr_size_orig;

      } else {
	for (size_t i=0; i<nf; i++) {
	  vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig);
	  vector<float> vout = pca.projection_coeff(vin);
	  dsift.insert(dsift.end(), vout.begin(), vout.end());
	}
	descr_size_final = pca.base_size();
      }
	  
      vl_dsift_delete(sf);
    }

    size_t numdata = dsift.size()/descr_size_final;
    const float *datain = &dsift[0];

    vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters());
    float *dataout = &enc[0];
      
    if (do_fisher) {
      if (FrameVerbose())
	cout << msg << "fisher encoding " << numdata
	     << " descriptors of size " << descr_size_orig
	     << " => " << descr_size_final
	     << " with gmm dimensionality " << descriptor_dim()
	     << endl;
      
      if (descr_size_final!=descriptor_dim()) {
	cerr << msg << "dimensionality mismatch descr_size_final="
	     << descr_size_final << " descriptor_dim()=" << descriptor_dim() 
	     << endl;
	return false;
      }

      vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		       nclusters(), covariances(), priors(),
		       datain, numdata, VL_FISHER_FLAG_IMPROVED) ;
    }

    if (do_vlad) {
      //obs! correct use of pca?

      if (FrameVerbose())
	cout << msg << "vlad encoding " << numdata
	     << " descriptors of size " << descr_size_final << endl;

      vector<vl_uint32> indexes(numdata);
      vector<float> distances(numdata);

      if (kdtree)
	vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain);
      else
	vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata);

      vector<float> assignments(numdata*nclusters());
      for (size_t i=0; i<numdata; i++)
	assignments[i * nclusters() + indexes[i]] = 1;
      
      int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS;

      vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		     nclusters(), datain, numdata, &assignments[0],
		     vlad_flags);
    }

    if (renormalize) {
      if (PixelVerbose())
	cout << " RENORM:";
      double mul = 0.0;
      for (size_t j=0; j<enc.size(); j++)
	mul += enc[j]*enc[j];
      if (mul)
	mul = 1.0/sqrt(mul);
      for (size_t j=0; j<enc.size(); j++) {
	if (PixelVerbose())
	  cout << " " << enc[j];
	enc[j] *= mul;
	if (PixelVerbose())
	  cout << "->" << enc[j];
      }
      if (PixelVerbose())
	cout << endl;
    }

    ((VectorData*)GetData(0))->setVector(enc);
    
    return true;
  }