Esempio n. 1
0
Mat PyramidsVl::extractFeatures(const Mat &im, vector<KeyPoint> &keypoints, int step)
{
	Q_UNUSED(step);
	Mat features(0, 128, CV_32F);
	double magnif = 6;
	QList<int> sizes;
	sizes << 4;
	sizes << 6;
	sizes << 8;
	sizes << 10;

	/* convert to float array */
	assert(im.type() == CV_8U);
	float *imdata = new float[im.rows * im.cols];
	for (int i = 0; i < im.rows; i++)
		for (int j = 0; j < im.cols; j++)
			imdata[i * im.cols + j] = im.row(i).data[j];
	float *smoothed = new float[im.rows * im.cols];

	for (int i = 0; i < sizes.size(); i++) {
		int step = sizes[i];

		/* smoothing step */
		double sigma = step / magnif;
		vl_imsmooth_f(smoothed, im.cols, imdata, im.cols, im.rows, im.cols, sigma, sigma);
		memcpy(smoothed, imdata, im.rows * im.cols * 4);

		/* denset sift */
		VlDsiftFilter *dsift = vl_dsift_new_basic(im.cols, im.rows, step, 8);
		vl_dsift_process(dsift, smoothed);
		int cnt = vl_dsift_get_keypoint_num(dsift);
		const float *descs = vl_dsift_get_descriptors(dsift);
		const VlDsiftKeypoint *kpts = vl_dsift_get_keypoints(dsift);
		for (int i = 0; i < cnt; i++) {
			Mat ft(1, 128, CV_32F);
			for (int j = 0; j < 128; j++)
				ft.at<float>(0, j) = qMin(descs[i * 128 + j] * 512, float(255));
			features.push_back(ft);
			KeyPoint kpt;
			kpt.pt.x = kpts[i].x;
			kpt.pt.y = kpts[i].y;
			keypoints.push_back(kpt);
		}
		vl_dsift_delete(dsift);
	}

	delete [] imdata;
	delete [] smoothed;

	return features;
}
Esempio n. 2
0
  bool VLFeat::CalculateCommon(int f, bool all, int l) {
    string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+
      ToStr(l)+") : ";

    // if (!do_fisher && !do_vlad) {
    //   cerr << msg
    // 	   << "either encoding=fisher or encoding=vlad should be specified"
    // 	   << endl;
    //   return false;
    // }

    if (!gmm && !kmeans) {
      cerr << msg << "either gmm=xxx or kmeans=xxx option should be given"
	   << endl;
      return false;
    }

    cox::tictac::func tt(tics, "VLFeat::CalculateCommon");
    
    // obs! only some parameters here, should be in ProcessOptionsAndRemove()
    // too, also scales and geometry should be made specifiable...
    bool normalizeSift = false, renormalize = true, flat_window = true;
    size_t step = 3, binsize = 8;

    EnsureImage();

    int width = Width(true), height = Height(true);

    if (FrameVerbose())
      cout << msg+"wxh="
	   << width << "x" << height << "=" << width*height << endl;

    vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 };

    imagedata idata = CurrentFrame();
    idata.convert(imagedata::pixeldata_float);
    idata.force_one_channel(rgbcoeff);

    vector<float> dsift;
    size_t descr_size_orig = 0, descr_size_final = 0;
    vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 };
    // vector<float> scales { 1.0000 };
    for (size_t i=0; i<scales.size(); i++) {
      if (KeyPointVerbose())
	cout << "Starting vl_dsift_process() in scale " << scales[i] << endl;
      
      imagedata simg = idata;
      if (scales[i]!=1) {
	scalinginfo si(simg.width(), simg.height(),
		       (int)floor(scales[i]*simg.width()+0.5),
		       (int)floor(scales[i]*simg.height()+0.5));
	simg.rescale(si, 1);
      }

      // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height());
      VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(),
					     step, binsize);

      // opts.scales = logspace(log10(1), log10(.25), 5) ;
      // void vl_dsift_set_bounds	(	VlDsiftFilter * 	self,
      // 					int 	minX,
      // 					int 	minY,
      // 					int 	maxX,
      // 					int 	maxY 
      // 					);	
      
      // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 };
      // vl_dsift_set_geometry(sf, &geom);
      
      //vl_dsift_set_steps(sf, 3, 3);

      //vl_dsift_set_window_size(sf, 8);

      vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab

      vector<float> imgvec = simg.get_float();
      const float *img_fp = &imgvec[0];
      // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " "
      //      << img_fp[2] << " ... " << img_fp[41] << endl;

      vl_dsift_process(sf, img_fp);
      
      // if opts.rootSift // false
      // 		descrs{si} = sqrt(descrs{si}) ;
      // end
      // 	if opts.normalizeSift //true
      // 		  descrs{si} = snorm(descrs{si}) ;
      // end

      descr_size_orig = sf->descrSize;
      size_t nf = sf->numFrames;
      const VlDsiftKeypoint *k = sf->frames;
      float *d  = sf->descrs;
      
      if (KeyPointVerbose())
	cout << "  found " << sf->numFrames << " 'frames' in "
	     << simg.info() << endl
	     << "  descriptor dim " << descr_size_orig << endl;
      
      if (PixelVerbose())
	for (size_t i=0; i<nf; i++) {
	  cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
	       << " s=" << k[i].s << " norm=" << k[i].norm;
	  if (FullVerbose()) {
	    cout << " RAW";
	    for (size_t j=0; j<descr_size_orig; j++)
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  cout << endl;
	}

      if (normalizeSift) {
	for (size_t i=0; i<nf; i++) {
	  if (PixelVerbose())
	    cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
		 << " s=" << k[i].s << " norm=" << k[i].norm;
	  double mul = 0.0;
	  for (size_t j=0; j<descr_size_orig; j++)
	    mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j];
	  if (mul)
	    mul = 1.0/sqrt(mul);
	  if (FullVerbose())
	    cout << " NORM";
	  for (size_t j=0; j<descr_size_orig; j++) {
	    d[i*descr_size_orig+j] *= mul;
	    if (FullVerbose())
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  if (PixelVerbose())
	    cout << endl;
	}
      }

      if (!pca.vector_length()) {
	dsift.insert(dsift.end(), d, d+nf*descr_size_orig);
	descr_size_final = descr_size_orig;

      } else {
	for (size_t i=0; i<nf; i++) {
	  vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig);
	  vector<float> vout = pca.projection_coeff(vin);
	  dsift.insert(dsift.end(), vout.begin(), vout.end());
	}
	descr_size_final = pca.base_size();
      }
	  
      vl_dsift_delete(sf);
    }

    size_t numdata = dsift.size()/descr_size_final;
    const float *datain = &dsift[0];

    vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters());
    float *dataout = &enc[0];
      
    if (do_fisher) {
      if (FrameVerbose())
	cout << msg << "fisher encoding " << numdata
	     << " descriptors of size " << descr_size_orig
	     << " => " << descr_size_final
	     << " with gmm dimensionality " << descriptor_dim()
	     << endl;
      
      if (descr_size_final!=descriptor_dim()) {
	cerr << msg << "dimensionality mismatch descr_size_final="
	     << descr_size_final << " descriptor_dim()=" << descriptor_dim() 
	     << endl;
	return false;
      }

      vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		       nclusters(), covariances(), priors(),
		       datain, numdata, VL_FISHER_FLAG_IMPROVED) ;
    }

    if (do_vlad) {
      //obs! correct use of pca?

      if (FrameVerbose())
	cout << msg << "vlad encoding " << numdata
	     << " descriptors of size " << descr_size_final << endl;

      vector<vl_uint32> indexes(numdata);
      vector<float> distances(numdata);

      if (kdtree)
	vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain);
      else
	vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata);

      vector<float> assignments(numdata*nclusters());
      for (size_t i=0; i<numdata; i++)
	assignments[i * nclusters() + indexes[i]] = 1;
      
      int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS;

      vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		     nclusters(), datain, numdata, &assignments[0],
		     vlad_flags);
    }

    if (renormalize) {
      if (PixelVerbose())
	cout << " RENORM:";
      double mul = 0.0;
      for (size_t j=0; j<enc.size(); j++)
	mul += enc[j]*enc[j];
      if (mul)
	mul = 1.0/sqrt(mul);
      for (size_t j=0; j<enc.size(); j++) {
	if (PixelVerbose())
	  cout << " " << enc[j];
	enc[j] *= mul;
	if (PixelVerbose())
	  cout << "->" << enc[j];
      }
      if (PixelVerbose())
	cout << endl;
    }

    ((VectorData*)GetData(0))->setVector(enc);
    
    return true;
  }
Esempio n. 3
0
/** ------------------------------------------------------------------
 ** @brief Python entry point
 **/
PyObject * vl_dsift_python(
		PyArrayObject & pyArray,
		int opt_step,
		PyArrayObject & opt_bounds,
		int opt_size,
		bool opt_fast,
		bool opt_verbose,
		bool opt_norm)
{
	// check data type
	assert(pyArray.descr->type_num == PyArray_FLOAT);
	assert(pyArray.flags & NPY_FORTRAN);
	assert(opt_bounds.descr->type_num == PyArray_FLOAT);

	int verbose = 0;
	int opt;
	float const *data;
	int M, N;

	int step = 1;
	int size = 3;
	vl_bool norm = 0;

	vl_bool useFlatWindow = VL_FALSE;

	double *bounds = NULL;
	double boundBuffer[4];

	/* -----------------------------------------------------------------
	 *                                               Check the arguments
	 * -------------------------------------------------------------- */
	data = (float*) pyArray.data;
	M = pyArray.dimensions[0];
	N = pyArray.dimensions[1];

	if (opt_verbose)
		++verbose;
	if (opt_fast)
		useFlatWindow = 1;
	if (opt_norm)
		norm = 1;
	if (opt_bounds.nd == 1 && opt_bounds.dimensions[0] == 4) {
		double * tmp = (double *) opt_bounds.data;
		bounds = boundBuffer;
		for (int i = 0; i < 4; i++)
			bounds[i] = tmp[i];
	}
	if (opt_size >= 0)
		size = opt_size;
	if (opt_step >= 0)
		step = opt_step;


	// create PyTuple for outputs
	PyObject * tuple = PyTuple_New(2);

	/* -----------------------------------------------------------------
	 *                                                            Do job
	 * -------------------------------------------------------------- */
	{
		int numFrames;
		int descrSize;
		VlDsiftKeypoint const *frames;
		VlDsiftDescriptorGeometry const *geom;
		float const *descrs;
		int k, i;

		VlDsiftFilter *dsift;
		dsift = vl_dsift_new_basic(M, N, step, size);
		if (bounds) {
			vl_dsift_set_bounds(dsift, VL_MAX(bounds[0], 0), VL_MAX(
				bounds[1], 0), VL_MIN(bounds[2], M - 1), VL_MIN(bounds[3], N
					- 1));
		}
		vl_dsift_set_flat_window(dsift, useFlatWindow);

	    numFrames = vl_dsift_get_keypoint_num (dsift) ;
	    descrSize = vl_dsift_get_descriptor_size (dsift) ;
	    geom = vl_dsift_get_geometry(dsift);

		if (verbose) {
			int stepX;
			int stepY;
			int minX;
			int minY;
			int maxX;
			int maxY;
			vl_bool useFlatWindow;

			vl_dsift_get_steps(dsift, &stepX, &stepY);
			vl_dsift_get_bounds(dsift, &minX, &minY, &maxX, &maxY);
			useFlatWindow = vl_dsift_get_flat_window(dsift);

			printf("dsift: image size:        %d x %d\n", N, M);
			printf(
				"      bounds:            [%d, %d, %d, %d]\n", minY, minX,
				maxY, maxX);
			printf("      subsampling steps: %d, %d\n", stepY, stepX);
			printf(
				"      num bins:          [%d, %d, %d]\n", geom->numBinT,
				geom->numBinX, geom->numBinY);
			printf("      descriptor size:   %d\n", descrSize);
			printf(
				"      bin sizes:         [%d, %d]\n", geom->binSizeX,
				geom->binSizeY);
			printf("      flat window:       %s\n", VL_YESNO(useFlatWindow));
			printf("      number of frames:  %d\n", numFrames);
		}

		vl_dsift_process(dsift, data);

		frames = vl_dsift_get_keypoints(dsift);
		descrs = vl_dsift_get_descriptors(dsift);

		/* ---------------------------------------------------------------
		 *                                            Create output arrays
		 * ------------------------------------------------------------ */
		npy_intp dims[2];

		dims[0] = descrSize;
		dims[1] = numFrames;

		// allocate PyArray objects
		PyArrayObject * _descriptors = (PyArrayObject *) PyArray_NewFromDescr(
			&PyArray_Type, PyArray_DescrFromType(PyArray_UINT8),
			2, dims, NULL, NULL, NPY_F_CONTIGUOUS, NULL);

		if (norm)
			dims[0] = 3;
		else
			dims[0] = 2;

		PyArrayObject * _frames = (PyArrayObject*) PyArray_NewFromDescr(
			&PyArray_Type, PyArray_DescrFromType(PyArray_DOUBLE),
			2, dims, NULL, NULL, NPY_F_CONTIGUOUS, NULL);

		// put PyArray objects in PyTuple
		PyTuple_SetItem(tuple, 0, PyArray_Return(_frames));
		PyTuple_SetItem(tuple, 1, PyArray_Return(_descriptors));

		/* ---------------------------------------------------------------
		 *                                                       Copy back
		 * ------------------------------------------------------------ */
		{
			float *tmpDescr = (float*) vl_malloc(sizeof(float) * descrSize);

			double *outFrameIter = (double*) _frames->data;
			vl_uint8 *outDescrIter = (vl_uint8 *) _descriptors->data;
			for (k = 0; k < numFrames; ++k) {
				*outFrameIter++ = frames[k].y;
				*outFrameIter++ = frames[k].x;

				/* We have an implied / 2 in the norm, because of the clipping
				 below */
				if (norm)
					*outFrameIter++ = frames[k].norm;

				vl_dsift_transpose_descriptor(
					tmpDescr, descrs + descrSize * k, geom->numBinT,
					geom->numBinX, geom->numBinY);

				for (i = 0; i < descrSize; ++i) {
					*outDescrIter++ = (vl_uint8) (VL_MIN(
						512.0F * tmpDescr[i], 255.0F));
				}
			}
			vl_free(tmpDescr);
		}
		vl_dsift_delete(dsift);
	}

	return tuple;
}