/* driver */ void mexFunction (int nout VL_UNUSED, mxArray * out[], int nin, const mxArray * in[]) { enum {IN_DATA = 0, IN_MEANS, IN_COVARIANCES, IN_PRIORS, IN_END} ; enum {OUT_ENC} ; int opt ; int next = IN_END ; mxArray const *optarg ; vl_size numClusters = 10; vl_size dimension ; vl_size numData ; int flags = 0 ; void * covariances = NULL; void * means = NULL; void * priors = NULL; void * data = NULL ; int verbosity = 0 ; vl_type dataType ; mxClassID classID ; VL_USE_MATLAB_ENV ; /* ----------------------------------------------------------------- * Check the arguments * -------------------------------------------------------------- */ if (nin < 4) { vlmxError (vlmxErrInvalidArgument, "At least four arguments required."); } if (nout > 1) { vlmxError (vlmxErrInvalidArgument, "At most one output argument."); } classID = mxGetClassID (IN(DATA)) ; switch (classID) { case mxSINGLE_CLASS: dataType = VL_TYPE_FLOAT ; break ; case mxDOUBLE_CLASS: dataType = VL_TYPE_DOUBLE ; break ; default: vlmxError (vlmxErrInvalidArgument, "DATA is neither of class SINGLE or DOUBLE.") ; } if (mxGetClassID (IN(MEANS)) != classID) { vlmxError(vlmxErrInvalidArgument, "MEANS is not of the same class as DATA.") ; } if (mxGetClassID (IN(COVARIANCES)) != classID) { vlmxError(vlmxErrInvalidArgument, "COVARIANCES is not of the same class as DATA.") ; } if (mxGetClassID (IN(PRIORS)) != classID) { vlmxError(vlmxErrInvalidArgument, "PRIORS is not of the same class as DATA.") ; } dimension = mxGetM (IN(DATA)) ; numData = mxGetN (IN(DATA)) ; numClusters = mxGetN (IN(MEANS)) ; if (dimension == 0) { vlmxError (vlmxErrInvalidArgument, "SIZE(DATA,1) is zero.") ; } if (!vlmxIsMatrix(IN(MEANS), dimension, numClusters)) { vlmxError (vlmxErrInvalidArgument, "MEANS is not a matrix or does not have the right size.") ; } if (!vlmxIsMatrix(IN(COVARIANCES), dimension, numClusters)) { vlmxError (vlmxErrInvalidArgument, "COVARIANCES is not a matrix or does not have the right size.") ; } if (!vlmxIsVector(IN(PRIORS), numClusters)) { vlmxError (vlmxErrInvalidArgument, "PRIORS is not a vector or does not have the right size.") ; } if (!vlmxIsMatrix(IN(DATA), dimension, numData)) { vlmxError (vlmxErrInvalidArgument, "DATA is not a matrix or does not have the right size.") ; } while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0) { switch (opt) { case opt_verbose : ++ verbosity ; break ; case opt_normalized: flags |= VL_FISHER_FLAG_NORMALIZED ; break ; case opt_square_root: flags |= VL_FISHER_FLAG_SQUARE_ROOT ; break ; case opt_improved: flags |= VL_FISHER_FLAG_IMPROVED ; break ; case opt_fast: flags |= VL_FISHER_FLAG_FAST ; break ; default : abort() ; } } /* ----------------------------------------------------------------- * Do the job * -------------------------------------------------------------- */ data = mxGetPr(IN(DATA)) ; means = mxGetPr(IN(MEANS)) ; covariances = mxGetPr(IN(COVARIANCES)) ; priors = mxGetPr(IN(PRIORS)) ; if (verbosity) { mexPrintf("vl_fisher: num data: %d\n", numData) ; mexPrintf("vl_fisher: num clusters: %d\n", numClusters) ; mexPrintf("vl_fisher: data dimension: %d\n", dimension) ; mexPrintf("vl_fisher: code dimension: %d\n", numClusters * dimension) ; mexPrintf("vl_fisher: normalized: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_NORMALIZED)) ; mexPrintf("vl_fisher: square root: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_SQUARE_ROOT)) ; mexPrintf("vl_fisher: normalized: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_NORMALIZED)) ; mexPrintf("vl_fisher: fast: %s\n", VL_YESNO(flags & VL_FISHER_FLAG_FAST)) ; } /* -------------------------------------------------------------- */ /* Encoding */ /* -------------------------------------------------------------- */ OUT(ENC) = mxCreateNumericMatrix (dimension * numClusters * 2, 1, classID, mxREAL) ; vl_fisher_encode (mxGetData(OUT(ENC)), dataType, means, dimension, numClusters, covariances, priors, data, numData, flags) ; }
bool VLFeat::CalculateCommon(int f, bool all, int l) { string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+ ToStr(l)+") : "; // if (!do_fisher && !do_vlad) { // cerr << msg // << "either encoding=fisher or encoding=vlad should be specified" // << endl; // return false; // } if (!gmm && !kmeans) { cerr << msg << "either gmm=xxx or kmeans=xxx option should be given" << endl; return false; } cox::tictac::func tt(tics, "VLFeat::CalculateCommon"); // obs! only some parameters here, should be in ProcessOptionsAndRemove() // too, also scales and geometry should be made specifiable... bool normalizeSift = false, renormalize = true, flat_window = true; size_t step = 3, binsize = 8; EnsureImage(); int width = Width(true), height = Height(true); if (FrameVerbose()) cout << msg+"wxh=" << width << "x" << height << "=" << width*height << endl; vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 }; imagedata idata = CurrentFrame(); idata.convert(imagedata::pixeldata_float); idata.force_one_channel(rgbcoeff); vector<float> dsift; size_t descr_size_orig = 0, descr_size_final = 0; vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 }; // vector<float> scales { 1.0000 }; for (size_t i=0; i<scales.size(); i++) { if (KeyPointVerbose()) cout << "Starting vl_dsift_process() in scale " << scales[i] << endl; imagedata simg = idata; if (scales[i]!=1) { scalinginfo si(simg.width(), simg.height(), (int)floor(scales[i]*simg.width()+0.5), (int)floor(scales[i]*simg.height()+0.5)); simg.rescale(si, 1); } // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height()); VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(), step, binsize); // opts.scales = logspace(log10(1), log10(.25), 5) ; // void vl_dsift_set_bounds ( VlDsiftFilter * self, // int minX, // int minY, // int maxX, // int maxY // ); // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 }; // vl_dsift_set_geometry(sf, &geom); //vl_dsift_set_steps(sf, 3, 3); //vl_dsift_set_window_size(sf, 8); vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab vector<float> imgvec = simg.get_float(); const float *img_fp = &imgvec[0]; // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " " // << img_fp[2] << " ... " << img_fp[41] << endl; vl_dsift_process(sf, img_fp); // if opts.rootSift // false // descrs{si} = sqrt(descrs{si}) ; // end // if opts.normalizeSift //true // descrs{si} = snorm(descrs{si}) ; // end descr_size_orig = sf->descrSize; size_t nf = sf->numFrames; const VlDsiftKeypoint *k = sf->frames; float *d = sf->descrs; if (KeyPointVerbose()) cout << " found " << sf->numFrames << " 'frames' in " << simg.info() << endl << " descriptor dim " << descr_size_orig << endl; if (PixelVerbose()) for (size_t i=0; i<nf; i++) { cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; if (FullVerbose()) { cout << " RAW"; for (size_t j=0; j<descr_size_orig; j++) cout << " " << d[i*descr_size_orig+j]; } cout << endl; } if (normalizeSift) { for (size_t i=0; i<nf; i++) { if (PixelVerbose()) cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; double mul = 0.0; for (size_t j=0; j<descr_size_orig; j++) mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j]; if (mul) mul = 1.0/sqrt(mul); if (FullVerbose()) cout << " NORM"; for (size_t j=0; j<descr_size_orig; j++) { d[i*descr_size_orig+j] *= mul; if (FullVerbose()) cout << " " << d[i*descr_size_orig+j]; } if (PixelVerbose()) cout << endl; } } if (!pca.vector_length()) { dsift.insert(dsift.end(), d, d+nf*descr_size_orig); descr_size_final = descr_size_orig; } else { for (size_t i=0; i<nf; i++) { vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig); vector<float> vout = pca.projection_coeff(vin); dsift.insert(dsift.end(), vout.begin(), vout.end()); } descr_size_final = pca.base_size(); } vl_dsift_delete(sf); } size_t numdata = dsift.size()/descr_size_final; const float *datain = &dsift[0]; vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters()); float *dataout = &enc[0]; if (do_fisher) { if (FrameVerbose()) cout << msg << "fisher encoding " << numdata << " descriptors of size " << descr_size_orig << " => " << descr_size_final << " with gmm dimensionality " << descriptor_dim() << endl; if (descr_size_final!=descriptor_dim()) { cerr << msg << "dimensionality mismatch descr_size_final=" << descr_size_final << " descriptor_dim()=" << descriptor_dim() << endl; return false; } vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), covariances(), priors(), datain, numdata, VL_FISHER_FLAG_IMPROVED) ; } if (do_vlad) { //obs! correct use of pca? if (FrameVerbose()) cout << msg << "vlad encoding " << numdata << " descriptors of size " << descr_size_final << endl; vector<vl_uint32> indexes(numdata); vector<float> distances(numdata); if (kdtree) vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain); else vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata); vector<float> assignments(numdata*nclusters()); for (size_t i=0; i<numdata; i++) assignments[i * nclusters() + indexes[i]] = 1; int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS; vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), datain, numdata, &assignments[0], vlad_flags); } if (renormalize) { if (PixelVerbose()) cout << " RENORM:"; double mul = 0.0; for (size_t j=0; j<enc.size(); j++) mul += enc[j]*enc[j]; if (mul) mul = 1.0/sqrt(mul); for (size_t j=0; j<enc.size(); j++) { if (PixelVerbose()) cout << " " << enc[j]; enc[j] *= mul; if (PixelVerbose()) cout << "->" << enc[j]; } if (PixelVerbose()) cout << endl; } ((VectorData*)GetData(0))->setVector(enc); return true; }