Mat PyramidsVl::extractFeatures(const Mat &im, vector<KeyPoint> &keypoints, int step) { Q_UNUSED(step); Mat features(0, 128, CV_32F); double magnif = 6; QList<int> sizes; sizes << 4; sizes << 6; sizes << 8; sizes << 10; /* convert to float array */ assert(im.type() == CV_8U); float *imdata = new float[im.rows * im.cols]; for (int i = 0; i < im.rows; i++) for (int j = 0; j < im.cols; j++) imdata[i * im.cols + j] = im.row(i).data[j]; float *smoothed = new float[im.rows * im.cols]; for (int i = 0; i < sizes.size(); i++) { int step = sizes[i]; /* smoothing step */ double sigma = step / magnif; vl_imsmooth_f(smoothed, im.cols, imdata, im.cols, im.rows, im.cols, sigma, sigma); memcpy(smoothed, imdata, im.rows * im.cols * 4); /* denset sift */ VlDsiftFilter *dsift = vl_dsift_new_basic(im.cols, im.rows, step, 8); vl_dsift_process(dsift, smoothed); int cnt = vl_dsift_get_keypoint_num(dsift); const float *descs = vl_dsift_get_descriptors(dsift); const VlDsiftKeypoint *kpts = vl_dsift_get_keypoints(dsift); for (int i = 0; i < cnt; i++) { Mat ft(1, 128, CV_32F); for (int j = 0; j < 128; j++) ft.at<float>(0, j) = qMin(descs[i * 128 + j] * 512, float(255)); features.push_back(ft); KeyPoint kpt; kpt.pt.x = kpts[i].x; kpt.pt.y = kpts[i].y; keypoints.push_back(kpt); } vl_dsift_delete(dsift); } delete [] imdata; delete [] smoothed; return features; }
bool VLFeat::CalculateCommon(int f, bool all, int l) { string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+ ToStr(l)+") : "; // if (!do_fisher && !do_vlad) { // cerr << msg // << "either encoding=fisher or encoding=vlad should be specified" // << endl; // return false; // } if (!gmm && !kmeans) { cerr << msg << "either gmm=xxx or kmeans=xxx option should be given" << endl; return false; } cox::tictac::func tt(tics, "VLFeat::CalculateCommon"); // obs! only some parameters here, should be in ProcessOptionsAndRemove() // too, also scales and geometry should be made specifiable... bool normalizeSift = false, renormalize = true, flat_window = true; size_t step = 3, binsize = 8; EnsureImage(); int width = Width(true), height = Height(true); if (FrameVerbose()) cout << msg+"wxh=" << width << "x" << height << "=" << width*height << endl; vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 }; imagedata idata = CurrentFrame(); idata.convert(imagedata::pixeldata_float); idata.force_one_channel(rgbcoeff); vector<float> dsift; size_t descr_size_orig = 0, descr_size_final = 0; vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 }; // vector<float> scales { 1.0000 }; for (size_t i=0; i<scales.size(); i++) { if (KeyPointVerbose()) cout << "Starting vl_dsift_process() in scale " << scales[i] << endl; imagedata simg = idata; if (scales[i]!=1) { scalinginfo si(simg.width(), simg.height(), (int)floor(scales[i]*simg.width()+0.5), (int)floor(scales[i]*simg.height()+0.5)); simg.rescale(si, 1); } // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height()); VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(), step, binsize); // opts.scales = logspace(log10(1), log10(.25), 5) ; // void vl_dsift_set_bounds ( VlDsiftFilter * self, // int minX, // int minY, // int maxX, // int maxY // ); // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 }; // vl_dsift_set_geometry(sf, &geom); //vl_dsift_set_steps(sf, 3, 3); //vl_dsift_set_window_size(sf, 8); vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab vector<float> imgvec = simg.get_float(); const float *img_fp = &imgvec[0]; // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " " // << img_fp[2] << " ... " << img_fp[41] << endl; vl_dsift_process(sf, img_fp); // if opts.rootSift // false // descrs{si} = sqrt(descrs{si}) ; // end // if opts.normalizeSift //true // descrs{si} = snorm(descrs{si}) ; // end descr_size_orig = sf->descrSize; size_t nf = sf->numFrames; const VlDsiftKeypoint *k = sf->frames; float *d = sf->descrs; if (KeyPointVerbose()) cout << " found " << sf->numFrames << " 'frames' in " << simg.info() << endl << " descriptor dim " << descr_size_orig << endl; if (PixelVerbose()) for (size_t i=0; i<nf; i++) { cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; if (FullVerbose()) { cout << " RAW"; for (size_t j=0; j<descr_size_orig; j++) cout << " " << d[i*descr_size_orig+j]; } cout << endl; } if (normalizeSift) { for (size_t i=0; i<nf; i++) { if (PixelVerbose()) cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; double mul = 0.0; for (size_t j=0; j<descr_size_orig; j++) mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j]; if (mul) mul = 1.0/sqrt(mul); if (FullVerbose()) cout << " NORM"; for (size_t j=0; j<descr_size_orig; j++) { d[i*descr_size_orig+j] *= mul; if (FullVerbose()) cout << " " << d[i*descr_size_orig+j]; } if (PixelVerbose()) cout << endl; } } if (!pca.vector_length()) { dsift.insert(dsift.end(), d, d+nf*descr_size_orig); descr_size_final = descr_size_orig; } else { for (size_t i=0; i<nf; i++) { vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig); vector<float> vout = pca.projection_coeff(vin); dsift.insert(dsift.end(), vout.begin(), vout.end()); } descr_size_final = pca.base_size(); } vl_dsift_delete(sf); } size_t numdata = dsift.size()/descr_size_final; const float *datain = &dsift[0]; vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters()); float *dataout = &enc[0]; if (do_fisher) { if (FrameVerbose()) cout << msg << "fisher encoding " << numdata << " descriptors of size " << descr_size_orig << " => " << descr_size_final << " with gmm dimensionality " << descriptor_dim() << endl; if (descr_size_final!=descriptor_dim()) { cerr << msg << "dimensionality mismatch descr_size_final=" << descr_size_final << " descriptor_dim()=" << descriptor_dim() << endl; return false; } vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), covariances(), priors(), datain, numdata, VL_FISHER_FLAG_IMPROVED) ; } if (do_vlad) { //obs! correct use of pca? if (FrameVerbose()) cout << msg << "vlad encoding " << numdata << " descriptors of size " << descr_size_final << endl; vector<vl_uint32> indexes(numdata); vector<float> distances(numdata); if (kdtree) vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain); else vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata); vector<float> assignments(numdata*nclusters()); for (size_t i=0; i<numdata; i++) assignments[i * nclusters() + indexes[i]] = 1; int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS; vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), datain, numdata, &assignments[0], vlad_flags); } if (renormalize) { if (PixelVerbose()) cout << " RENORM:"; double mul = 0.0; for (size_t j=0; j<enc.size(); j++) mul += enc[j]*enc[j]; if (mul) mul = 1.0/sqrt(mul); for (size_t j=0; j<enc.size(); j++) { if (PixelVerbose()) cout << " " << enc[j]; enc[j] *= mul; if (PixelVerbose()) cout << "->" << enc[j]; } if (PixelVerbose()) cout << endl; } ((VectorData*)GetData(0))->setVector(enc); return true; }
/** ------------------------------------------------------------------ ** @brief Python entry point **/ PyObject * vl_dsift_python( PyArrayObject & pyArray, int opt_step, PyArrayObject & opt_bounds, int opt_size, bool opt_fast, bool opt_verbose, bool opt_norm) { // check data type assert(pyArray.descr->type_num == PyArray_FLOAT); assert(pyArray.flags & NPY_FORTRAN); assert(opt_bounds.descr->type_num == PyArray_FLOAT); int verbose = 0; int opt; float const *data; int M, N; int step = 1; int size = 3; vl_bool norm = 0; vl_bool useFlatWindow = VL_FALSE; double *bounds = NULL; double boundBuffer[4]; /* ----------------------------------------------------------------- * Check the arguments * -------------------------------------------------------------- */ data = (float*) pyArray.data; M = pyArray.dimensions[0]; N = pyArray.dimensions[1]; if (opt_verbose) ++verbose; if (opt_fast) useFlatWindow = 1; if (opt_norm) norm = 1; if (opt_bounds.nd == 1 && opt_bounds.dimensions[0] == 4) { double * tmp = (double *) opt_bounds.data; bounds = boundBuffer; for (int i = 0; i < 4; i++) bounds[i] = tmp[i]; } if (opt_size >= 0) size = opt_size; if (opt_step >= 0) step = opt_step; // create PyTuple for outputs PyObject * tuple = PyTuple_New(2); /* ----------------------------------------------------------------- * Do job * -------------------------------------------------------------- */ { int numFrames; int descrSize; VlDsiftKeypoint const *frames; VlDsiftDescriptorGeometry const *geom; float const *descrs; int k, i; VlDsiftFilter *dsift; dsift = vl_dsift_new_basic(M, N, step, size); if (bounds) { vl_dsift_set_bounds(dsift, VL_MAX(bounds[0], 0), VL_MAX( bounds[1], 0), VL_MIN(bounds[2], M - 1), VL_MIN(bounds[3], N - 1)); } vl_dsift_set_flat_window(dsift, useFlatWindow); numFrames = vl_dsift_get_keypoint_num (dsift) ; descrSize = vl_dsift_get_descriptor_size (dsift) ; geom = vl_dsift_get_geometry(dsift); if (verbose) { int stepX; int stepY; int minX; int minY; int maxX; int maxY; vl_bool useFlatWindow; vl_dsift_get_steps(dsift, &stepX, &stepY); vl_dsift_get_bounds(dsift, &minX, &minY, &maxX, &maxY); useFlatWindow = vl_dsift_get_flat_window(dsift); printf("dsift: image size: %d x %d\n", N, M); printf( " bounds: [%d, %d, %d, %d]\n", minY, minX, maxY, maxX); printf(" subsampling steps: %d, %d\n", stepY, stepX); printf( " num bins: [%d, %d, %d]\n", geom->numBinT, geom->numBinX, geom->numBinY); printf(" descriptor size: %d\n", descrSize); printf( " bin sizes: [%d, %d]\n", geom->binSizeX, geom->binSizeY); printf(" flat window: %s\n", VL_YESNO(useFlatWindow)); printf(" number of frames: %d\n", numFrames); } vl_dsift_process(dsift, data); frames = vl_dsift_get_keypoints(dsift); descrs = vl_dsift_get_descriptors(dsift); /* --------------------------------------------------------------- * Create output arrays * ------------------------------------------------------------ */ npy_intp dims[2]; dims[0] = descrSize; dims[1] = numFrames; // allocate PyArray objects PyArrayObject * _descriptors = (PyArrayObject *) PyArray_NewFromDescr( &PyArray_Type, PyArray_DescrFromType(PyArray_UINT8), 2, dims, NULL, NULL, NPY_F_CONTIGUOUS, NULL); if (norm) dims[0] = 3; else dims[0] = 2; PyArrayObject * _frames = (PyArrayObject*) PyArray_NewFromDescr( &PyArray_Type, PyArray_DescrFromType(PyArray_DOUBLE), 2, dims, NULL, NULL, NPY_F_CONTIGUOUS, NULL); // put PyArray objects in PyTuple PyTuple_SetItem(tuple, 0, PyArray_Return(_frames)); PyTuple_SetItem(tuple, 1, PyArray_Return(_descriptors)); /* --------------------------------------------------------------- * Copy back * ------------------------------------------------------------ */ { float *tmpDescr = (float*) vl_malloc(sizeof(float) * descrSize); double *outFrameIter = (double*) _frames->data; vl_uint8 *outDescrIter = (vl_uint8 *) _descriptors->data; for (k = 0; k < numFrames; ++k) { *outFrameIter++ = frames[k].y; *outFrameIter++ = frames[k].x; /* We have an implied / 2 in the norm, because of the clipping below */ if (norm) *outFrameIter++ = frames[k].norm; vl_dsift_transpose_descriptor( tmpDescr, descrs + descrSize * k, geom->numBinT, geom->numBinX, geom->numBinY); for (i = 0; i < descrSize; ++i) { *outDescrIter++ = (vl_uint8) (VL_MIN( 512.0F * tmpDescr[i], 255.0F)); } } vl_free(tmpDescr); } vl_dsift_delete(dsift); } return tuple; }