int main (int argc, char *argv[]) { string data_file; string benchmark; string index_file; float W, R, desired_recall = 1.0; unsigned M, L, H; unsigned Q, K, T; bool do_recall = false; bool do_benchmark = true; bool use_index = false; // load the index from a file boost::timer timer; po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message.") (",W", po::value<float>(&W)->default_value(1.0), "") (",M", po::value<unsigned>(&M)->default_value(1), "") (",T", po::value<unsigned>(&T)->default_value(1), "# probes") (",L", po::value<unsigned>(&L)->default_value(1), "# hash tables") (",Q", po::value<unsigned>(&Q)->default_value(100), "# queries") (",K", po::value<unsigned>(&K)->default_value(0), "# nearest neighbor to retrieve") ("radius,R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range (L2)") ("recall", po::value<float>(&desired_recall), "desired recall") ("data,D", po::value<string>(&data_file), "data file") ("benchmark,B", po::value<string>(&benchmark), "benchmark file") ("index", po::value<string>(&index_file), "index file") (",H", po::value<unsigned>(&H)->default_value(1017881), "hash table size, use the default value.") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); if (vm.count("help") || (vm.count("data") < 1)) { cout << desc; return 0; } if (vm.count("radius") >= 1) { R *= R; // we use L2sqr in the program. } if (vm.count("recall") >= 1) { do_recall = true; if (K == 0) { cerr << "Automatic probing does not support R-NN query." << endl; } } if ((Q == 0) || (vm.count("benchmark") == 0)) { do_benchmark = false; } if (vm.count("index") == 1) { use_index = true; } cout << "LOADING DATA..." << endl; timer.restart(); FloatMatrix data(data_file); cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl; typedef MultiProbeLshIndex<unsigned> Index; FloatMatrix::Accessor accessor(data); Index index; // try loading index bool index_loaded = false; if (use_index) { ifstream is(index_file.c_str(), ios_base::binary); if (is) { is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit); cout << "LOADING INDEX..." << endl; timer.restart(); index.load(is); BOOST_VERIFY(is); cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl; index_loaded = true; } } if (!index_loaded) { // We define a short name for the MPLSH index. Index::Parameter param; // Setup the parameters. Note that L is not provided here. param.W = W; param.range = H; // See H in the program parameters. You can just use the default value. param.repeat = M; param.dim = data.getDim(); DefaultRng rng; index.init(param, rng, L); // The accessor. // Initialize the index structure. Note L is passed here. cout << "CONSTRUCTING INDEX..." << endl; timer.restart(); { boost::progress_display progress(data.getSize()); for (int i = 0; i < data.getSize(); ++i) { // Insert an item to the hash table. // Note that only the key is passed in here. // MPLSH will get the feature from the accessor. index.insert(i, data[i]); ++progress; } } cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl; if (use_index) { timer.restart(); cout << "SAVING INDEX..." << endl; { ofstream os(index_file.c_str(), ios_base::binary); os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit); index.save(os); } cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl; } } if (do_benchmark) { Benchmark<> bench; cout << "LOADING BENCHMARK..." << endl; bench.load(benchmark); bench.resize(Q, K); cout << "DONE." << endl; for (unsigned i = 0; i < Q; ++i) { for (unsigned j = 0; j < K; ++j) { assert(bench.getAnswer(i)[j].key < data.getSize()); } } cout << "RUNNING QUERIES..." << endl; Stat recall; Stat cost; metric::l2sqr<float> l2sqr(data.getDim()); TopkScanner<FloatMatrix::Accessor, metric::l2sqr<float> > query(accessor, l2sqr, K, R); vector<Topk<unsigned> > topks(Q); timer.restart(); if (do_recall) // Specify the required recall // and let MPLSH to guess how many bins to probe. { boost::progress_display progress(Q); for (unsigned i = 0; i < Q; ++i) { // Query for one point. query.reset(data[bench.getQuery(i)]); index.query_recall(data[bench.getQuery(i)], desired_recall, query); cost << double(query.cnt())/double(data.getSize()); topks[i].swap(query.topk()); ++progress; } } else // specify how many bins to probe. { boost::progress_display progress(Q); for (unsigned i = 0; i < Q; ++i) { query.reset(data[bench.getQuery(i)]); index.query(data[bench.getQuery(i)], T, query); cost << double(query.cnt())/double(data.getSize()); topks[i].swap(query.topk()); ++progress; } } for (unsigned i = 0; i < Q; ++i) { recall << bench.getAnswer(i).recall(topks[i]); } cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl; cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl; cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl; } return 0; }
int main (int argc, char *argv[]) { string data_file; string benchmark; float W, R = 1.0; unsigned M, L, H; unsigned Q, K, T; bool do_recall = false; Timer timer; po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message.") (",W", po::value<float>(&W)->default_value(1.0), "") (",M", po::value<unsigned>(&M)->default_value(1), "") (",L", po::value<unsigned>(&L)->default_value(1), "") (",H", po::value<unsigned>(&H)->default_value(1017881), "") (",Q", po::value<unsigned>(&Q)->default_value(1), "") (",K", po::value<unsigned>(&K)->default_value(1), "") (",T", po::value<unsigned>(&T)->default_value(1), "") ("recall,R", po::value<float>(&R), "") ("data,D", po::value<string>(&data_file), "") ("benchmark,B", po::value<string>(&benchmark), "") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); if (vm.count("help") || (vm.count("data") < 1) || (vm.count("benchmark") < 1)) { cout << desc; return 0; } if (vm.count("recall") >= 1) { do_recall = true; } cout << "Loading data..."; Matrix<float> data(data_file); cout << "done." << endl; Benchmark<> bench(K, Q); cout << "Loading benchmark..."; bench.load(benchmark); cout << "done." << endl; for (unsigned i = 0; i < Q; ++i) { for (unsigned j = 0; j < K; ++j) { assert(bench.getAnswer(i)[j].key < data.getSize()); } } cout << "Initializing index..." << endl; typedef MultiProbeLshIndex<MatrixAccessor> Index; Index::Parameter param; param.W = W; param.H = H; param.M = M; param.dim = data.getDim(); DefaultRng rng; MatrixAccessor accessor(data); Index index(param, rng, accessor, L); cout << "done." << endl; cout << "Populating index..." << endl; timer.tick(); { boost::progress_display progress(data.getSize()); for (unsigned i = 0; i < data.getSize(); ++i) { index.insert(i); ++progress; } } timer.tuck("CREATE"); cout << "Running queries..." << endl; Stat recall; Stat cost; Topk<unsigned> topk; timer.tick(); if (do_recall) { boost::progress_display progress(Q); for (unsigned i = 0; i < Q; ++i) { unsigned cnt; topk.reset(K); index.query(data[bench.getQuery(i)], topk, R, &cnt); recall << bench.getAnswer(i).recall(topk); cost << double(cnt)/double(data.getSize()); ++progress; } } else { boost::progress_display progress(Q); for (unsigned i = 0; i < Q; ++i) { unsigned cnt; topk.reset(K); index.query(data[bench.getQuery(i)], topk, T, &cnt); recall << bench.getAnswer(i).recall(topk); cost << double(cnt)/double(data.getSize()); ++progress; } } timer.tuck("QUERY"); cout << "[RECALL] " << recall.getAvg() << " ± " << recall.getStd() << endl; cout << "[COST] " << cost.getAvg() << " ± " << cost.getStd() << endl; return 0; }
int main (int argc, char *argv[]) { string data_file; string benchmark; float R, W; unsigned c, L, H; unsigned Q, K; bool do_benchmark = true; // bool use_index = false; // load the index from a file boost::timer timer; po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message.") (",c", po::value<unsigned>(&c)->default_value(20), "# points to scan from each tree") (",L", po::value<unsigned>(&L)->default_value(1), "number of trees") (",H", po::value<unsigned>(&H)->default_value(10), "maximal depth of tree") (",W", po::value<float>(&W)->default_value(1.0), "hash function window size") (",Q", po::value<unsigned>(&Q)->default_value(100), "number of queries to use") (",K", po::value<unsigned>(&K)->default_value(50), "number of nearest neighbors to retrieve") (",R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range") ("data,D", po::value<string>(&data_file), "dataset path") ("benchmark,B", po::value<string>(&benchmark), "benchmark path") // ("index", po::value<string>(&index_file), "index file") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); if (vm.count("help") || (vm.count("data") < 1)) { cout << desc; return 0; } if ((Q == 0) || (vm.count("benchmark") == 0)) { do_benchmark = false; } /* if (vm.count("index") == 1) { use_index = true; } */ cout << "LOADING DATA..." << endl; timer.restart(); FloatMatrix data(data_file); cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl; //typedef Tail<RepeatHash<CauchyLsh> > MyLsh; typedef LSB<GaussianLsh> MyLsh; typedef ForestIndex<MyLsh, unsigned> Index; FloatMatrix::Accessor accessor(data); metric::l2<float> l2(data.getDim()); Index index; // bool index_loaded = false; /* if (use_index) { ifstream is(index_file.c_str(), ios_base::binary); if (is) { is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit); cout << "LOADING INDEX..." << endl; timer.restart(); index.load(is); verify(is); cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl; index_loaded = true; } } if (!index_loaded) { // We define a short name for the MPLSH index. float min = numeric_limits<float>::max(); float max = -numeric_limits<float>::max(); for (unsigned i = 0; i < data.getSize(); ++i) { for (unsigned j = 0; j < data.getDim(); ++j) { if (data[i][j] > max) max = data[i][j]; if (data[i][j] < min) min = data[i][j]; } } */ Index::Parameter param; // Setup the parameters. Note that L is not provided here. param.W = W; param.dim = data.getDim(); DefaultRng rng; index.init(param, rng, L, H); // The accessor. // Initialize the index structure. Note L is passed here. cout << "CONSTRUCTING INDEX..." << endl; timer.restart(); { boost::progress_display progress(data.getSize()); for (unsigned i = 0; i < data.getSize(); ++i) { // Insert an item to the hash table. // Note that only the key is passed in here. // MPLSH will get the feature from the accessor. index.insert(i, accessor); ++progress; } } cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl; /* if (use_index) { timer.restart(); cout << "SAVING INDEX..." << endl; { ofstream os(index_file.c_str(), ios_base::binary); os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit); index.save(os); verify(os); } cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl; } } */ if (do_benchmark) { Benchmark<> bench; cout << "LOADING BENCHMARK..." << endl; bench.load(benchmark); bench.resize(Q, K); cout << "DONE." << endl; for (unsigned i = 0; i < Q; ++i) { for (unsigned j = 0; j < K; ++j) { assert(bench.getAnswer(i)[j].key < data.getSize()); } } cout << "RUNNING QUERIES..." << endl; Stat recall; Stat cost; timer.restart(); { TopkScanner<FloatMatrix::Accessor, metric::l2<float> > query(accessor, l2, K, R); boost::progress_display progress(Q); for (unsigned i = 0; i < Q; ++i) { query.reset(data[bench.getQuery(i)]); index.query(data[bench.getQuery(i)], c * L, query); recall << bench.getAnswer(i).recall(query.topk()); cost << double(query.cnt())/double(data.getSize()); ++progress; } } cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl; cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl; cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl; } return 0; }