Ejemplo n.º 1
0
int main (int argc, char *argv[])
{
    string data_file;
    string benchmark;
    string index_file;

    float W, R, desired_recall = 1.0;
    unsigned M, L, H;
    unsigned Q, K, T;
    bool do_recall = false;
    bool do_benchmark = true;
    bool use_index = false; // load the index from a file

    boost::timer timer;

    po::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message.")
        (",W", po::value<float>(&W)->default_value(1.0), "")
        (",M", po::value<unsigned>(&M)->default_value(1), "")
        (",T", po::value<unsigned>(&T)->default_value(1), "# probes")
        (",L", po::value<unsigned>(&L)->default_value(1), "# hash tables")
        (",Q", po::value<unsigned>(&Q)->default_value(100), "# queries")
        (",K", po::value<unsigned>(&K)->default_value(0), "# nearest neighbor to retrieve")
        ("radius,R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range (L2)")
        ("recall", po::value<float>(&desired_recall), "desired recall")
        ("data,D", po::value<string>(&data_file), "data file")
        ("benchmark,B", po::value<string>(&benchmark), "benchmark file")
        ("index", po::value<string>(&index_file), "index file")
        (",H", po::value<unsigned>(&H)->default_value(1017881), "hash table size, use the default value.")
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm); 

    if (vm.count("help") || (vm.count("data") < 1))
    {
        cout << desc;
        return 0;
    }

    if (vm.count("radius") >= 1) {
        R *= R; // we use L2sqr in the program.
    }

    if (vm.count("recall") >= 1)
    {
        do_recall = true;
        if (K == 0) {
            cerr << "Automatic probing does not support R-NN query." << endl;
        }
    }

    if ((Q == 0) || (vm.count("benchmark") == 0)) {
        do_benchmark = false;
    }

    if (vm.count("index") == 1) {
        use_index = true;
    }

    cout << "LOADING DATA..." << endl;
    timer.restart();
    FloatMatrix data(data_file);
    cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;

    typedef MultiProbeLshIndex<unsigned> Index;

    FloatMatrix::Accessor accessor(data);
    Index index;

    // try loading index
    bool index_loaded = false;

    if (use_index) {
        ifstream is(index_file.c_str(), ios_base::binary);
        if (is) {
            is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
            cout << "LOADING INDEX..." << endl;
            timer.restart();
            index.load(is);
            BOOST_VERIFY(is);
            cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;
            index_loaded = true;
        }
    }

    if (!index_loaded) {
        // We define a short name for the MPLSH index.
        Index::Parameter param;

        // Setup the parameters.  Note that L is not provided here.
        param.W = W;
        param.range = H; // See H in the program parameters.  You can just use the default value.
        param.repeat = M;
        param.dim = data.getDim();
        DefaultRng rng;

        index.init(param, rng, L);
        // The accessor.

        // Initialize the index structure.  Note L is passed here.
        cout << "CONSTRUCTING INDEX..." << endl;

        timer.restart();
        {
            boost::progress_display progress(data.getSize());
            for (int i = 0; i < data.getSize(); ++i)
            {
                // Insert an item to the hash table.
                // Note that only the key is passed in here.
                // MPLSH will get the feature from the accessor.
                index.insert(i, data[i]);
                ++progress;
            }
        }
        cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl;

        if (use_index) {
            timer.restart();
            cout << "SAVING INDEX..." << endl;
            {
                ofstream os(index_file.c_str(), ios_base::binary);
                os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
                index.save(os);
            }
            cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl;
        }
    }

    if (do_benchmark) {

        Benchmark<> bench;
        cout << "LOADING BENCHMARK..." << endl;
        bench.load(benchmark);
        bench.resize(Q, K);
        cout << "DONE." << endl;

        for (unsigned i = 0; i < Q; ++i)
        {
            for (unsigned j = 0; j < K; ++j)
            {
                assert(bench.getAnswer(i)[j].key < data.getSize());
            }
        }

        cout << "RUNNING QUERIES..." << endl;

        Stat recall;
        Stat cost;
        metric::l2sqr<float> l2sqr(data.getDim());
        TopkScanner<FloatMatrix::Accessor, metric::l2sqr<float> > query(accessor, l2sqr, K, R);
        vector<Topk<unsigned> > topks(Q);

        timer.restart();
        if (do_recall)
            // Specify the required recall
            // and let MPLSH to guess how many bins to probe.
        {
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                // Query for one point.
                query.reset(data[bench.getQuery(i)]);
                index.query_recall(data[bench.getQuery(i)], desired_recall, query);
                cost << double(query.cnt())/double(data.getSize());
                topks[i].swap(query.topk());
                ++progress;
            }
        }
        else
            // specify how many bins to probe.
        {
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                query.reset(data[bench.getQuery(i)]);
                index.query(data[bench.getQuery(i)], T, query);
                cost << double(query.cnt())/double(data.getSize());
                topks[i].swap(query.topk());
                ++progress;
            }
        }

        for (unsigned i = 0; i < Q; ++i) {
            recall << bench.getAnswer(i).recall(topks[i]);
        }

        cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl;

        cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl;
        cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl;

    }

    return 0;
}
Ejemplo n.º 2
0
int main (int argc, char *argv[])
{
    string data_file;
    string benchmark;

    float W, R = 1.0;
    unsigned M, L, H;
    unsigned Q, K, T;
    bool do_recall = false;

    Timer timer;

	po::options_description desc("Allowed options");
	desc.add_options()
		("help,h", "produce help message.")
		(",W", po::value<float>(&W)->default_value(1.0), "")
		(",M", po::value<unsigned>(&M)->default_value(1), "")
		(",L", po::value<unsigned>(&L)->default_value(1), "")
		(",H", po::value<unsigned>(&H)->default_value(1017881), "")
		(",Q", po::value<unsigned>(&Q)->default_value(1), "")
		(",K", po::value<unsigned>(&K)->default_value(1), "")
		(",T", po::value<unsigned>(&T)->default_value(1), "")
		("recall,R", po::value<float>(&R), "")
		("data,D", po::value<string>(&data_file), "")
		("benchmark,B", po::value<string>(&benchmark), "")
		;

	po::variables_map vm;
	po::store(po::parse_command_line(argc, argv, desc), vm);
	po::notify(vm);	

	if (vm.count("help") || (vm.count("data") < 1) || (vm.count("benchmark") < 1))
	{
		cout << desc;
		return 0;
	}

    if (vm.count("recall") >= 1)
    {
        do_recall = true;
    }

    cout << "Loading data...";
    Matrix<float> data(data_file);
    cout << "done." << endl;

    Benchmark<> bench(K, Q);
    cout << "Loading benchmark...";
    bench.load(benchmark);
    cout << "done." << endl;

    for (unsigned i = 0; i < Q; ++i)
    {
        for (unsigned j = 0; j < K; ++j)
        {
            assert(bench.getAnswer(i)[j].key < data.getSize());
				}
    }

    cout << "Initializing index..." << endl;

    typedef MultiProbeLshIndex<MatrixAccessor> Index;
        
    Index::Parameter param;

    param.W = W;
    param.H = H;
    param.M = M;
    param.dim = data.getDim();
    DefaultRng rng;

    MatrixAccessor accessor(data);

    Index index(param, rng, accessor, L);

    cout << "done." << endl;

    cout << "Populating index..." << endl;

    timer.tick();

    {
        boost::progress_display progress(data.getSize());
        for (unsigned i = 0; i < data.getSize(); ++i)
        {
            index.insert(i);
            ++progress;
        }
    }
    timer.tuck("CREATE");

    cout << "Running queries..." << endl;

    Stat recall;
    Stat cost;
    Topk<unsigned> topk;

    timer.tick();
    if (do_recall)
    {
        boost::progress_display progress(Q);
        for (unsigned i = 0; i < Q; ++i)
        {
            unsigned cnt;
            topk.reset(K);
            index.query(data[bench.getQuery(i)], topk, R, &cnt);
            recall << bench.getAnswer(i).recall(topk);
            cost << double(cnt)/double(data.getSize());
            ++progress;
        }
    }
    else
    {
        boost::progress_display progress(Q);
        for (unsigned i = 0; i < Q; ++i)
        {
            unsigned cnt;
            topk.reset(K);
            index.query(data[bench.getQuery(i)], topk, T, &cnt);
            recall << bench.getAnswer(i).recall(topk);
            cost << double(cnt)/double(data.getSize());
            ++progress;
        }
    }
    timer.tuck("QUERY");

    cout << "[RECALL] " << recall.getAvg() << " ± " << recall.getStd() << endl;
    cout << "[COST] " << cost.getAvg() << " ± " << cost.getStd() << endl;

    return 0;
}
Ejemplo n.º 3
0
int main (int argc, char *argv[])
{
    string data_file;
    string benchmark;

    float R, W;
    unsigned c, L, H;
    unsigned Q, K;
    bool do_benchmark = true;
    // bool use_index = false; // load the index from a file

    boost::timer timer;

    po::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message.")
        (",c", po::value<unsigned>(&c)->default_value(20), "# points to scan from each tree")
        (",L", po::value<unsigned>(&L)->default_value(1), "number of trees")
        (",H", po::value<unsigned>(&H)->default_value(10), "maximal depth of tree")
        (",W", po::value<float>(&W)->default_value(1.0), "hash function window size")
        (",Q", po::value<unsigned>(&Q)->default_value(100), "number of queries to use")
        (",K", po::value<unsigned>(&K)->default_value(50), "number of nearest neighbors to retrieve")
        (",R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range")
        ("data,D", po::value<string>(&data_file), "dataset path")
        ("benchmark,B", po::value<string>(&benchmark), "benchmark path")
        // ("index", po::value<string>(&index_file), "index file")
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm); 

    if (vm.count("help") || (vm.count("data") < 1))
    {
        cout << desc;
        return 0;
    }

    if ((Q == 0) || (vm.count("benchmark") == 0)) {
        do_benchmark = false;
    }

    /*
    if (vm.count("index") == 1) {
        use_index = true;
    }
    */

    cout << "LOADING DATA..." << endl;
    timer.restart();
    FloatMatrix data(data_file);
    cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;

    //typedef Tail<RepeatHash<CauchyLsh> > MyLsh;

    typedef LSB<GaussianLsh> MyLsh;
    typedef ForestIndex<MyLsh, unsigned> Index;

    FloatMatrix::Accessor accessor(data);
    metric::l2<float> l2(data.getDim());
    Index index;

    // bool index_loaded = false;

    /*
    if (use_index) {
        ifstream is(index_file.c_str(), ios_base::binary);
        if (is) {
            is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
            cout << "LOADING INDEX..." << endl;
            timer.restart();
            index.load(is);
            verify(is);
            cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;
            index_loaded = true;
        }
    }

    if (!index_loaded) {
        // We define a short name for the MPLSH index.
        float min = numeric_limits<float>::max();
        float max = -numeric_limits<float>::max();

        for (unsigned i = 0; i < data.getSize(); ++i) {
            for (unsigned j = 0; j < data.getDim(); ++j) {
                if (data[i][j] > max) max = data[i][j];
                if (data[i][j] < min) min = data[i][j];
            }
        }

        */

        Index::Parameter param;

        // Setup the parameters.  Note that L is not provided here.
        param.W = W;
        param.dim = data.getDim();
        DefaultRng rng;

        index.init(param, rng, L, H);
        // The accessor.

        // Initialize the index structure.  Note L is passed here.
        cout << "CONSTRUCTING INDEX..." << endl;

        timer.restart();
        {
            boost::progress_display progress(data.getSize());
            for (unsigned i = 0; i < data.getSize(); ++i)
            {
                // Insert an item to the hash table.
                // Note that only the key is passed in here.
                // MPLSH will get the feature from the accessor.
                index.insert(i, accessor);
                ++progress;
            }
        }
        cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl;

        /*
        if (use_index) {
            timer.restart();
            cout << "SAVING INDEX..." << endl;
            {
                ofstream os(index_file.c_str(), ios_base::binary);
                os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
                index.save(os);
                verify(os);
            }
            cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl;
        }
    }
    */

    if (do_benchmark) {

        Benchmark<> bench;
        cout << "LOADING BENCHMARK..." << endl;
        bench.load(benchmark);
        bench.resize(Q, K);
        cout << "DONE." << endl;

        for (unsigned i = 0; i < Q; ++i)
        {
            for (unsigned j = 0; j < K; ++j)
            {
                assert(bench.getAnswer(i)[j].key < data.getSize());
            }
        }

        cout << "RUNNING QUERIES..." << endl;

        Stat recall;
        Stat cost;

        timer.restart();
        {
            TopkScanner<FloatMatrix::Accessor, metric::l2<float> > query(accessor, l2, K, R);
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                query.reset(data[bench.getQuery(i)]);
                index.query(data[bench.getQuery(i)], c * L, query);
                recall << bench.getAnswer(i).recall(query.topk());
                cost << double(query.cnt())/double(data.getSize());
                ++progress;
            }
        }
        cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl;

        cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl;
        cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl;

    }

    return 0;
}