예제 #1
0
int main (int argc, char *argv[])
{
    string prefix;
    unsigned K;
    float R;

    po::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message.")
        (",K", po::value<unsigned>(&K)->default_value(100), "number of nearest neighbors.")
        ("prefix,D", po::value<string>(&prefix), "")
        ("threshold,R", po::value<float>(&R)->default_value(std::numeric_limits<float>::max()), "radius of search.") 
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm); 

    if (vm.count("help") || (vm.count("prefix") < 1))
    {
        cout << help << endl;
        cout << desc;
        return 0;
    }

    Matrix<float> train(prefix + ".train");
    Matrix<float> test(prefix + ".test");

    Benchmark<unsigned> bench;
    bench.init(test.getSize(), test.getSize() * 100);
    boost::timer timer;

    timer.restart();

    metric::l2<float> l2(train.getDim());

    boost::progress_display progress(test.getSize());
    int totalSize = 0;
    for (unsigned i = 0; i < test.getSize(); ++i)
    {
        Topk<unsigned> &topk = bench.getAnswer(i);
       
        
        topk.reset(K,R);

        for (unsigned j = 0; j < train.getSize(); ++j)
        {
            topk << Topk<unsigned>::Element(j, l2(test[i],
                                    train[j]));
        }
   
        ++progress;
        totalSize += topk.numElements();
    }



    cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl;
    cout << "Average List Size: " << float(totalSize)/test.getSize() << endl;

    if (R < std::numeric_limits<float>::max()) 
        bench.save(prefix + ".bench-eps");
    else
        bench.save(prefix + ".bench");

    return 0;
}
예제 #2
0
int main (int argc, char *argv[])
{
    string data_file;
    string benchmark;
    string index_file;

    float W, R, desired_recall = 1.0;
    unsigned M, L, H;
    unsigned Q, K, T;
    bool do_recall = false;
    bool do_benchmark = true;
    bool use_index = false; // load the index from a file

    boost::timer timer;

    po::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message.")
        (",W", po::value<float>(&W)->default_value(1.0), "")
        (",M", po::value<unsigned>(&M)->default_value(1), "")
        (",T", po::value<unsigned>(&T)->default_value(1), "# probes")
        (",L", po::value<unsigned>(&L)->default_value(1), "# hash tables")
        (",Q", po::value<unsigned>(&Q)->default_value(100), "# queries")
        (",K", po::value<unsigned>(&K)->default_value(0), "# nearest neighbor to retrieve")
        ("radius,R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range (L2)")
        ("recall", po::value<float>(&desired_recall), "desired recall")
        ("data,D", po::value<string>(&data_file), "data file")
        ("benchmark,B", po::value<string>(&benchmark), "benchmark file")
        ("index", po::value<string>(&index_file), "index file")
        (",H", po::value<unsigned>(&H)->default_value(1017881), "hash table size, use the default value.")
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm); 

    if (vm.count("help") || (vm.count("data") < 1))
    {
        cout << desc;
        return 0;
    }

    if (vm.count("radius") >= 1) {
        R *= R; // we use L2sqr in the program.
    }

    if (vm.count("recall") >= 1)
    {
        do_recall = true;
        if (K == 0) {
            cerr << "Automatic probing does not support R-NN query." << endl;
        }
    }

    if ((Q == 0) || (vm.count("benchmark") == 0)) {
        do_benchmark = false;
    }

    if (vm.count("index") == 1) {
        use_index = true;
    }

    cout << "LOADING DATA..." << endl;
    timer.restart();
    FloatMatrix data(data_file);
    cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;

    typedef MultiProbeLshIndex<unsigned> Index;

    FloatMatrix::Accessor accessor(data);
    Index index;

    // try loading index
    bool index_loaded = false;

    if (use_index) {
        ifstream is(index_file.c_str(), ios_base::binary);
        if (is) {
            is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
            cout << "LOADING INDEX..." << endl;
            timer.restart();
            index.load(is);
            BOOST_VERIFY(is);
            cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;
            index_loaded = true;
        }
    }

    if (!index_loaded) {
        // We define a short name for the MPLSH index.
        Index::Parameter param;

        // Setup the parameters.  Note that L is not provided here.
        param.W = W;
        param.range = H; // See H in the program parameters.  You can just use the default value.
        param.repeat = M;
        param.dim = data.getDim();
        DefaultRng rng;

        index.init(param, rng, L);
        // The accessor.

        // Initialize the index structure.  Note L is passed here.
        cout << "CONSTRUCTING INDEX..." << endl;

        timer.restart();
        {
            boost::progress_display progress(data.getSize());
            for (int i = 0; i < data.getSize(); ++i)
            {
                // Insert an item to the hash table.
                // Note that only the key is passed in here.
                // MPLSH will get the feature from the accessor.
                index.insert(i, data[i]);
                ++progress;
            }
        }
        cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl;

        if (use_index) {
            timer.restart();
            cout << "SAVING INDEX..." << endl;
            {
                ofstream os(index_file.c_str(), ios_base::binary);
                os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
                index.save(os);
            }
            cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl;
        }
    }

    if (do_benchmark) {

        Benchmark<> bench;
        cout << "LOADING BENCHMARK..." << endl;
        bench.load(benchmark);
        bench.resize(Q, K);
        cout << "DONE." << endl;

        for (unsigned i = 0; i < Q; ++i)
        {
            for (unsigned j = 0; j < K; ++j)
            {
                assert(bench.getAnswer(i)[j].key < data.getSize());
            }
        }

        cout << "RUNNING QUERIES..." << endl;

        Stat recall;
        Stat cost;
        metric::l2sqr<float> l2sqr(data.getDim());
        TopkScanner<FloatMatrix::Accessor, metric::l2sqr<float> > query(accessor, l2sqr, K, R);
        vector<Topk<unsigned> > topks(Q);

        timer.restart();
        if (do_recall)
            // Specify the required recall
            // and let MPLSH to guess how many bins to probe.
        {
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                // Query for one point.
                query.reset(data[bench.getQuery(i)]);
                index.query_recall(data[bench.getQuery(i)], desired_recall, query);
                cost << double(query.cnt())/double(data.getSize());
                topks[i].swap(query.topk());
                ++progress;
            }
        }
        else
            // specify how many bins to probe.
        {
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                query.reset(data[bench.getQuery(i)]);
                index.query(data[bench.getQuery(i)], T, query);
                cost << double(query.cnt())/double(data.getSize());
                topks[i].swap(query.topk());
                ++progress;
            }
        }

        for (unsigned i = 0; i < Q; ++i) {
            recall << bench.getAnswer(i).recall(topks[i]);
        }

        cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl;

        cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl;
        cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl;

    }

    return 0;
}
예제 #3
0
int main (int argc, char *argv[])
{
    string data_file;
    string benchmark;

    float R, W;
    unsigned c, L, H;
    unsigned Q, K;
    bool do_benchmark = true;
    // bool use_index = false; // load the index from a file

    boost::timer timer;

    po::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message.")
        (",c", po::value<unsigned>(&c)->default_value(20), "# points to scan from each tree")
        (",L", po::value<unsigned>(&L)->default_value(1), "number of trees")
        (",H", po::value<unsigned>(&H)->default_value(10), "maximal depth of tree")
        (",W", po::value<float>(&W)->default_value(1.0), "hash function window size")
        (",Q", po::value<unsigned>(&Q)->default_value(100), "number of queries to use")
        (",K", po::value<unsigned>(&K)->default_value(50), "number of nearest neighbors to retrieve")
        (",R", po::value<float>(&R)->default_value(numeric_limits<float>::max()), "R-NN distance range")
        ("data,D", po::value<string>(&data_file), "dataset path")
        ("benchmark,B", po::value<string>(&benchmark), "benchmark path")
        // ("index", po::value<string>(&index_file), "index file")
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm); 

    if (vm.count("help") || (vm.count("data") < 1))
    {
        cout << desc;
        return 0;
    }

    if ((Q == 0) || (vm.count("benchmark") == 0)) {
        do_benchmark = false;
    }

    /*
    if (vm.count("index") == 1) {
        use_index = true;
    }
    */

    cout << "LOADING DATA..." << endl;
    timer.restart();
    FloatMatrix data(data_file);
    cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;

    //typedef Tail<RepeatHash<CauchyLsh> > MyLsh;

    typedef LSB<GaussianLsh> MyLsh;
    typedef ForestIndex<MyLsh, unsigned> Index;

    FloatMatrix::Accessor accessor(data);
    metric::l2<float> l2(data.getDim());
    Index index;

    // bool index_loaded = false;

    /*
    if (use_index) {
        ifstream is(index_file.c_str(), ios_base::binary);
        if (is) {
            is.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
            cout << "LOADING INDEX..." << endl;
            timer.restart();
            index.load(is);
            verify(is);
            cout << boost::format("LOAD TIME: %1%s.") % timer.elapsed() << endl;
            index_loaded = true;
        }
    }

    if (!index_loaded) {
        // We define a short name for the MPLSH index.
        float min = numeric_limits<float>::max();
        float max = -numeric_limits<float>::max();

        for (unsigned i = 0; i < data.getSize(); ++i) {
            for (unsigned j = 0; j < data.getDim(); ++j) {
                if (data[i][j] > max) max = data[i][j];
                if (data[i][j] < min) min = data[i][j];
            }
        }

        */

        Index::Parameter param;

        // Setup the parameters.  Note that L is not provided here.
        param.W = W;
        param.dim = data.getDim();
        DefaultRng rng;

        index.init(param, rng, L, H);
        // The accessor.

        // Initialize the index structure.  Note L is passed here.
        cout << "CONSTRUCTING INDEX..." << endl;

        timer.restart();
        {
            boost::progress_display progress(data.getSize());
            for (unsigned i = 0; i < data.getSize(); ++i)
            {
                // Insert an item to the hash table.
                // Note that only the key is passed in here.
                // MPLSH will get the feature from the accessor.
                index.insert(i, accessor);
                ++progress;
            }
        }
        cout << boost::format("CONSTRUCTION TIME: %1%s.") % timer.elapsed() << endl;

        /*
        if (use_index) {
            timer.restart();
            cout << "SAVING INDEX..." << endl;
            {
                ofstream os(index_file.c_str(), ios_base::binary);
                os.exceptions(ios_base::eofbit | ios_base::failbit | ios_base::badbit);
                index.save(os);
                verify(os);
            }
            cout << boost::format("SAVING TIME: %1%s") % timer.elapsed() << endl;
        }
    }
    */

    if (do_benchmark) {

        Benchmark<> bench;
        cout << "LOADING BENCHMARK..." << endl;
        bench.load(benchmark);
        bench.resize(Q, K);
        cout << "DONE." << endl;

        for (unsigned i = 0; i < Q; ++i)
        {
            for (unsigned j = 0; j < K; ++j)
            {
                assert(bench.getAnswer(i)[j].key < data.getSize());
            }
        }

        cout << "RUNNING QUERIES..." << endl;

        Stat recall;
        Stat cost;

        timer.restart();
        {
            TopkScanner<FloatMatrix::Accessor, metric::l2<float> > query(accessor, l2, K, R);
            boost::progress_display progress(Q);
            for (unsigned i = 0; i < Q; ++i)
            {
                query.reset(data[bench.getQuery(i)]);
                index.query(data[bench.getQuery(i)], c * L, query);
                recall << bench.getAnswer(i).recall(query.topk());
                cost << double(query.cnt())/double(data.getSize());
                ++progress;
            }
        }
        cout << boost::format("QUERY TIME: %1%s.") % timer.elapsed() << endl;

        cout << "[RECALL] " << recall.getAvg() << " +/- " << recall.getStd() << endl;
        cout << "[COST] " << cost.getAvg() << " +/- " << cost.getStd() << endl;

    }

    return 0;
}