void kmeans() { const vl_size data_dim = 2; const vl_size num_data = 1000; float data[data_dim * num_data] = { 0.0f, }; for (vl_size i = 0; i < data_dim * num_data; ++i) data[i] = (float)std::rand() / RAND_MAX; // std::cout << "start processing ..." << std::endl; const vl_size num_clusters = 3; const VlVectorComparisonType distance = VlDistanceL2; const vl_size num_max_iterations = 100; VlKMeans *kmeans = vl_kmeans_new(VL_TYPE_DOUBLE, distance); vl_kmeans_set_max_num_iterations(kmeans, num_max_iterations); if (true) { // initialization. #if 1 vl_kmeans_init_centers_with_rand_data(kmeans, (void const *)data, data_dim, num_data, num_clusters); #elif 0 vl_kmeans_init_centers_plus_plus(kmeans, (void const *)data, data_dim, num_data, num_clusters); #else { float init_centers[data_dim * num_clusters] = { 0.0f, }; for (vl_size i = 0; i < data_dim * num_clusters; ++i) init_centers[i] = (float)std::rand() / RAND_MAX; vl_kmeans_set_centers(kmeans, (void const *)init_centers, data_dim, num_clusters); } #endif // clustering. const double energy = vl_kmeans_refine_centers(kmeans, data, num_data); } else { const VlKMeansAlgorithm algorithm = VlKMeansLloyd; // VlKMeansLloyd, VlKMeansElkan, VlKMeansANN. const VlKMeansInitialization initialization = VlKMeansRandomSelection; // VlKMeansRandomSelection, VlKMeansPlusPlus. const vl_size num_repetitions = 100; vl_kmeans_set_algorithm(kmeans, VlKMeansLloyd); vl_kmeans_set_initialization(kmeans, VlKMeansRandomSelection); vl_kmeans_set_num_repetitions(kmeans, num_repetitions); // clustering. const double energy = vl_kmeans_cluster(kmeans, (void const *)data, data_dim, num_data, num_clusters); } // const vl_size num_iterations = vl_kmeans_get_num_repetitions(kmeans); //const vl_type data_type = vl_kmeans_get_data_type(kmeans); // { const float *centers = (float *)vl_kmeans_get_centers(kmeans); for (int i = 0; i < num_clusters; ++i) { std::cout << '('; for (int j = 0; j < data_dim; ++j) { if (j) std::cout << ','; std::cout << centers[i * data_dim + j]; } std::cout << ')' << std::endl; } } // { vl_uint32 assignments[num_data] = { 0, }; double distances[num_data] = { 0, }; vl_kmeans_quantize(kmeans, assignments, (void *)distances, (void const *)data, num_data); for (int i = 0; i < num_data; ++i) { std::cout << '('; for (int j = 0; j < data_dim; ++j) { if (j) std::cout << ','; std::cout << data[i * data_dim + j]; // TODO [check] >> is it correct? } std::cout << ") => " << assignments[i] << std::endl; } } std::cout << "end processing ..." << std::endl; // if (kmeans) { vl_kmeans_delete(kmeans); kmeans = NULL; } }
bool VLFeat::CalculateCommon(int f, bool all, int l) { string msg = "VLFeat::CalculateCommon("+ToStr(f)+","+ToStr(all)+","+ ToStr(l)+") : "; // if (!do_fisher && !do_vlad) { // cerr << msg // << "either encoding=fisher or encoding=vlad should be specified" // << endl; // return false; // } if (!gmm && !kmeans) { cerr << msg << "either gmm=xxx or kmeans=xxx option should be given" << endl; return false; } cox::tictac::func tt(tics, "VLFeat::CalculateCommon"); // obs! only some parameters here, should be in ProcessOptionsAndRemove() // too, also scales and geometry should be made specifiable... bool normalizeSift = false, renormalize = true, flat_window = true; size_t step = 3, binsize = 8; EnsureImage(); int width = Width(true), height = Height(true); if (FrameVerbose()) cout << msg+"wxh=" << width << "x" << height << "=" << width*height << endl; vector<float> rgbcoeff { 0.2989, 0.5870, 0.1140 }; imagedata idata = CurrentFrame(); idata.convert(imagedata::pixeldata_float); idata.force_one_channel(rgbcoeff); vector<float> dsift; size_t descr_size_orig = 0, descr_size_final = 0; vector<float> scales { 1.0000, 0.7071, 0.5000, 0.3536, 0.2500 }; // vector<float> scales { 1.0000 }; for (size_t i=0; i<scales.size(); i++) { if (KeyPointVerbose()) cout << "Starting vl_dsift_process() in scale " << scales[i] << endl; imagedata simg = idata; if (scales[i]!=1) { scalinginfo si(simg.width(), simg.height(), (int)floor(scales[i]*simg.width()+0.5), (int)floor(scales[i]*simg.height()+0.5)); simg.rescale(si, 1); } // VlDsiftFilter *sf = vl_dsift_new(simg.width(), simg.height()); VlDsiftFilter *sf = vl_dsift_new_basic(simg.width(), simg.height(), step, binsize); // opts.scales = logspace(log10(1), log10(.25), 5) ; // void vl_dsift_set_bounds ( VlDsiftFilter * self, // int minX, // int minY, // int maxX, // int maxY // ); // VlDsiftDescriptorGeometry geom = { 8, 4, 4, 0, 0 }; // vl_dsift_set_geometry(sf, &geom); //vl_dsift_set_steps(sf, 3, 3); //vl_dsift_set_window_size(sf, 8); vl_dsift_set_flat_window(sf, flat_window); // aka fast in matlab vector<float> imgvec = simg.get_float(); const float *img_fp = &imgvec[0]; // cout << "IMAGE = " << img_fp[0] << " " << img_fp[1] << " " // << img_fp[2] << " ... " << img_fp[41] << endl; vl_dsift_process(sf, img_fp); // if opts.rootSift // false // descrs{si} = sqrt(descrs{si}) ; // end // if opts.normalizeSift //true // descrs{si} = snorm(descrs{si}) ; // end descr_size_orig = sf->descrSize; size_t nf = sf->numFrames; const VlDsiftKeypoint *k = sf->frames; float *d = sf->descrs; if (KeyPointVerbose()) cout << " found " << sf->numFrames << " 'frames' in " << simg.info() << endl << " descriptor dim " << descr_size_orig << endl; if (PixelVerbose()) for (size_t i=0; i<nf; i++) { cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; if (FullVerbose()) { cout << " RAW"; for (size_t j=0; j<descr_size_orig; j++) cout << " " << d[i*descr_size_orig+j]; } cout << endl; } if (normalizeSift) { for (size_t i=0; i<nf; i++) { if (PixelVerbose()) cout << " i=" << i << " x=" << k[i].x << " y=" << k[i].y << " s=" << k[i].s << " norm=" << k[i].norm; double mul = 0.0; for (size_t j=0; j<descr_size_orig; j++) mul += d[i*descr_size_orig+j]*d[i*descr_size_orig+j]; if (mul) mul = 1.0/sqrt(mul); if (FullVerbose()) cout << " NORM"; for (size_t j=0; j<descr_size_orig; j++) { d[i*descr_size_orig+j] *= mul; if (FullVerbose()) cout << " " << d[i*descr_size_orig+j]; } if (PixelVerbose()) cout << endl; } } if (!pca.vector_length()) { dsift.insert(dsift.end(), d, d+nf*descr_size_orig); descr_size_final = descr_size_orig; } else { for (size_t i=0; i<nf; i++) { vector<float> vin(d+i*descr_size_orig, d+(i+1)*descr_size_orig); vector<float> vout = pca.projection_coeff(vin); dsift.insert(dsift.end(), vout.begin(), vout.end()); } descr_size_final = pca.base_size(); } vl_dsift_delete(sf); } size_t numdata = dsift.size()/descr_size_final; const float *datain = &dsift[0]; vector<float> enc((do_fisher?2:1)*descriptor_dim()*nclusters()); float *dataout = &enc[0]; if (do_fisher) { if (FrameVerbose()) cout << msg << "fisher encoding " << numdata << " descriptors of size " << descr_size_orig << " => " << descr_size_final << " with gmm dimensionality " << descriptor_dim() << endl; if (descr_size_final!=descriptor_dim()) { cerr << msg << "dimensionality mismatch descr_size_final=" << descr_size_final << " descriptor_dim()=" << descriptor_dim() << endl; return false; } vl_fisher_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), covariances(), priors(), datain, numdata, VL_FISHER_FLAG_IMPROVED) ; } if (do_vlad) { //obs! correct use of pca? if (FrameVerbose()) cout << msg << "vlad encoding " << numdata << " descriptors of size " << descr_size_final << endl; vector<vl_uint32> indexes(numdata); vector<float> distances(numdata); if (kdtree) vl_kdforest_query_with_array(kdtree, &indexes[0], 1, numdata, &distances[0], datain); else vl_kmeans_quantize(kmeans, &indexes[0], &distances[0], datain, numdata); vector<float> assignments(numdata*nclusters()); for (size_t i=0; i<numdata; i++) assignments[i * nclusters() + indexes[i]] = 1; int vlad_flags = VL_VLAD_FLAG_SQUARE_ROOT|VL_VLAD_FLAG_NORMALIZE_COMPONENTS; vl_vlad_encode(dataout, VL_TYPE_FLOAT, means(), descriptor_dim(), nclusters(), datain, numdata, &assignments[0], vlad_flags); } if (renormalize) { if (PixelVerbose()) cout << " RENORM:"; double mul = 0.0; for (size_t j=0; j<enc.size(); j++) mul += enc[j]*enc[j]; if (mul) mul = 1.0/sqrt(mul); for (size_t j=0; j<enc.size(); j++) { if (PixelVerbose()) cout << " " << enc[j]; enc[j] *= mul; if (PixelVerbose()) cout << "->" << enc[j]; } if (PixelVerbose()) cout << endl; } ((VectorData*)GetData(0))->setVector(enc); return true; }