Exemple #1
0
/* driver */
void
mexFunction (int nout VL_UNUSED, mxArray * out[], int nin, const mxArray * in[])
{
  enum {IN_DATA = 0, IN_MEANS, IN_COVARIANCES, IN_PRIORS, IN_END} ;
  enum {OUT_ENC} ;

  int opt ;
  int next = IN_END ;
  mxArray const  *optarg ;

  vl_size numClusters = 10;
  vl_size dimension ;
  vl_size numData ;
  int flags = 0 ;

  void * covariances = NULL;
  void * means = NULL;
  void * priors = NULL;
  void * data = NULL ;

  int verbosity = 0 ;

  vl_type dataType ;
  mxClassID classID ;

  VL_USE_MATLAB_ENV ;

  /* -----------------------------------------------------------------
   *                                               Check the arguments
   * -------------------------------------------------------------- */

  if (nin < 4) {
    vlmxError (vlmxErrInvalidArgument,
               "At least four arguments required.");
  }
  if (nout > 1) {
    vlmxError (vlmxErrInvalidArgument,
               "At most one output argument.");
  }

  classID = mxGetClassID (IN(DATA)) ;
  switch (classID) {
    case mxSINGLE_CLASS: dataType = VL_TYPE_FLOAT ; break ;
    case mxDOUBLE_CLASS: dataType = VL_TYPE_DOUBLE ; break ;
    default:
      vlmxError (vlmxErrInvalidArgument,
                 "DATA is neither of class SINGLE or DOUBLE.") ;
  }

  if (mxGetClassID (IN(MEANS)) != classID) {
    vlmxError(vlmxErrInvalidArgument, "MEANS is not of the same class as DATA.") ;
  }
  if (mxGetClassID (IN(COVARIANCES)) != classID) {
    vlmxError(vlmxErrInvalidArgument, "COVARIANCES is not of the same class as DATA.") ;
  }
  if (mxGetClassID (IN(PRIORS)) != classID) {
    vlmxError(vlmxErrInvalidArgument, "PRIORS is not of the same class as DATA.") ;
  }

  dimension = mxGetM (IN(DATA)) ;
  numData = mxGetN (IN(DATA)) ;
  numClusters = mxGetN (IN(MEANS)) ;

  if (dimension == 0) {
    vlmxError (vlmxErrInvalidArgument, "SIZE(DATA,1) is zero.") ;
  }
  if (!vlmxIsMatrix(IN(MEANS), dimension, numClusters)) {
    vlmxError (vlmxErrInvalidArgument, "MEANS is not a matrix or does not have the right size.") ;
  }
  if (!vlmxIsMatrix(IN(COVARIANCES), dimension, numClusters)) {
    vlmxError (vlmxErrInvalidArgument, "COVARIANCES is not a matrix or does not have the right size.") ;
  }
  if (!vlmxIsVector(IN(PRIORS), numClusters)) {
    vlmxError (vlmxErrInvalidArgument, "PRIORS is not a vector or does not have the right size.") ;
  }
  if (!vlmxIsMatrix(IN(DATA), dimension, numData)) {
    vlmxError (vlmxErrInvalidArgument, "DATA is not a matrix or does not have the right size.") ;
  }

  while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0) {
    switch (opt) {
      case opt_verbose : ++ verbosity ; break ;
      case opt_normalized: flags |= VL_FISHER_FLAG_NORMALIZED ; break ;
      case opt_square_root: flags |= VL_FISHER_FLAG_SQUARE_ROOT ; break ;
      case opt_improved: flags |= VL_FISHER_FLAG_IMPROVED ; break ;
      case opt_fast: flags |= VL_FISHER_FLAG_FAST ; break ;
      default : abort() ;
    }
  }

  /* -----------------------------------------------------------------
   *                                                        Do the job
   * -------------------------------------------------------------- */

  data = mxGetPr(IN(DATA)) ;
  means = mxGetPr(IN(MEANS)) ;
  covariances = mxGetPr(IN(COVARIANCES)) ;
  priors = mxGetPr(IN(PRIORS)) ;

  if (verbosity) {
    mexPrintf("vl_fisher: num data: %d\n", numData) ;
    mexPrintf("vl_fisher: num clusters: %d\n", numClusters) ;
    mexPrintf("vl_fisher: data dimension: %d\n", dimension) ;
    mexPrintf("vl_fisher: code dimension: %d\n", numClusters * dimension) ;
    mexPrintf("vl_fisher: normalized: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_NORMALIZED)) ;
    mexPrintf("vl_fisher: square root: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_SQUARE_ROOT)) ;
    mexPrintf("vl_fisher: normalized: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_NORMALIZED)) ;
    mexPrintf("vl_fisher: fast: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_FAST)) ;
  }

  /* -------------------------------------------------------------- */
  /*                                                       Encoding */
  /* -------------------------------------------------------------- */

  OUT(ENC) = mxCreateNumericMatrix (dimension * numClusters * 2, 1, classID, mxREAL) ;

  vl_fisher_encode (mxGetData(OUT(ENC)), dataType,
                    means, dimension, numClusters,
                    covariances,
                    priors,
                    data, numData,
                    flags) ;
}
Exemple #2
0
  bool VLFeat::CalculateCommon(int f, bool all, int l) {
    string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+
      ToStr(l)+") : ";

    // if (!do_fisher && !do_vlad) {
    //   cerr << msg
    // 	   << "either encoding=fisher or encoding=vlad should be specified"
    // 	   << endl;
    //   return false;
    // }

    if (!gmm && !kmeans) {
      cerr << msg << "either gmm=xxx or kmeans=xxx option should be given"
	   << endl;
      return false;
    }

    cox::tictac::func tt(tics, "VLFeat::CalculateCommon");
    
    // obs! only some parameters here, should be in ProcessOptionsAndRemove()
    // too, also scales and geometry should be made specifiable...
    bool normalizeSift = false, renormalize = true, flat_window = true;
    size_t step = 3, binsize = 8;

    EnsureImage();

    int width = Width(true), height = Height(true);

    if (FrameVerbose())
      cout << msg+"wxh="
	   << width << "x" << height << "=" << width*height << endl;

    vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 };

    imagedata idata = CurrentFrame();
    idata.convert(imagedata::pixeldata_float);
    idata.force_one_channel(rgbcoeff);

    vector<float> dsift;
    size_t descr_size_orig = 0, descr_size_final = 0;
    vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 };
    // vector<float> scales { 1.0000 };
    for (size_t i=0; i<scales.size(); i++) {
      if (KeyPointVerbose())
	cout << "Starting vl_dsift_process() in scale " << scales[i] << endl;
      
      imagedata simg = idata;
      if (scales[i]!=1) {
	scalinginfo si(simg.width(), simg.height(),
		       (int)floor(scales[i]*simg.width()+0.5),
		       (int)floor(scales[i]*simg.height()+0.5));
	simg.rescale(si, 1);
      }

      // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height());
      VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(),
					     step, binsize);

      // opts.scales = logspace(log10(1), log10(.25), 5) ;
      // void vl_dsift_set_bounds	(	VlDsiftFilter * 	self,
      // 					int 	minX,
      // 					int 	minY,
      // 					int 	maxX,
      // 					int 	maxY 
      // 					);	
      
      // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 };
      // vl_dsift_set_geometry(sf, &geom);
      
      //vl_dsift_set_steps(sf, 3, 3);

      //vl_dsift_set_window_size(sf, 8);

      vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab

      vector<float> imgvec = simg.get_float();
      const float *img_fp = &imgvec[0];
      // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " "
      //      << img_fp[2] << " ... " << img_fp[41] << endl;

      vl_dsift_process(sf, img_fp);
      
      // if opts.rootSift // false
      // 		descrs{si} = sqrt(descrs{si}) ;
      // end
      // 	if opts.normalizeSift //true
      // 		  descrs{si} = snorm(descrs{si}) ;
      // end

      descr_size_orig = sf->descrSize;
      size_t nf = sf->numFrames;
      const VlDsiftKeypoint *k = sf->frames;
      float *d  = sf->descrs;
      
      if (KeyPointVerbose())
	cout << "  found " << sf->numFrames << " 'frames' in "
	     << simg.info() << endl
	     << "  descriptor dim " << descr_size_orig << endl;
      
      if (PixelVerbose())
	for (size_t i=0; i<nf; i++) {
	  cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
	       << " s=" << k[i].s << " norm=" << k[i].norm;
	  if (FullVerbose()) {
	    cout << " RAW";
	    for (size_t j=0; j<descr_size_orig; j++)
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  cout << endl;
	}

      if (normalizeSift) {
	for (size_t i=0; i<nf; i++) {
	  if (PixelVerbose())
	    cout << "  i=" << i << " x=" << k[i].x << " y=" << k[i].y
		 << " s=" << k[i].s << " norm=" << k[i].norm;
	  double mul = 0.0;
	  for (size_t j=0; j<descr_size_orig; j++)
	    mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j];
	  if (mul)
	    mul = 1.0/sqrt(mul);
	  if (FullVerbose())
	    cout << " NORM";
	  for (size_t j=0; j<descr_size_orig; j++) {
	    d[i*descr_size_orig+j] *= mul;
	    if (FullVerbose())
	      cout << " " << d[i*descr_size_orig+j];
	  }
	  if (PixelVerbose())
	    cout << endl;
	}
      }

      if (!pca.vector_length()) {
	dsift.insert(dsift.end(), d, d+nf*descr_size_orig);
	descr_size_final = descr_size_orig;

      } else {
	for (size_t i=0; i<nf; i++) {
	  vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig);
	  vector<float> vout = pca.projection_coeff(vin);
	  dsift.insert(dsift.end(), vout.begin(), vout.end());
	}
	descr_size_final = pca.base_size();
      }
	  
      vl_dsift_delete(sf);
    }

    size_t numdata = dsift.size()/descr_size_final;
    const float *datain = &dsift[0];

    vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters());
    float *dataout = &enc[0];
      
    if (do_fisher) {
      if (FrameVerbose())
	cout << msg << "fisher encoding " << numdata
	     << " descriptors of size " << descr_size_orig
	     << " => " << descr_size_final
	     << " with gmm dimensionality " << descriptor_dim()
	     << endl;
      
      if (descr_size_final!=descriptor_dim()) {
	cerr << msg << "dimensionality mismatch descr_size_final="
	     << descr_size_final << " descriptor_dim()=" << descriptor_dim() 
	     << endl;
	return false;
      }

      vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		       nclusters(), covariances(), priors(),
		       datain, numdata, VL_FISHER_FLAG_IMPROVED) ;
    }

    if (do_vlad) {
      //obs! correct use of pca?

      if (FrameVerbose())
	cout << msg << "vlad encoding " << numdata
	     << " descriptors of size " << descr_size_final << endl;

      vector<vl_uint32> indexes(numdata);
      vector<float> distances(numdata);

      if (kdtree)
	vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain);
      else
	vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata);

      vector<float> assignments(numdata*nclusters());
      for (size_t i=0; i<numdata; i++)
	assignments[i * nclusters() + indexes[i]] = 1;
      
      int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS;

      vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(),
		     nclusters(), datain, numdata, &assignments[0],
		     vlad_flags);
    }

    if (renormalize) {
      if (PixelVerbose())
	cout << " RENORM:";
      double mul = 0.0;
      for (size_t j=0; j<enc.size(); j++)
	mul += enc[j]*enc[j];
      if (mul)
	mul = 1.0/sqrt(mul);
      for (size_t j=0; j<enc.size(); j++) {
	if (PixelVerbose())
	  cout << " " << enc[j];
	enc[j] *= mul;
	if (PixelVerbose())
	  cout << "->" << enc[j];
      }
      if (PixelVerbose())
	cout << endl;
    }

    ((VectorData*)GetData(0))->setVector(enc);
    
    return true;
  }