Exemple #1
0
void check_cv(Index& idx, Classifier& c, double min_accuracy)
{
    std::vector<doc_id> docs = idx.docs();
    classify::confusion_matrix mtx = c.cross_validate(docs, 5);
    ASSERT_GREATER(mtx.accuracy(), min_accuracy);
    ASSERT_LESS(mtx.accuracy(), 100.0);
}
Exemple #2
0
classify::confusion_matrix cv(Index& idx, Classifier& c, bool even)
{
    std::vector<doc_id> docs = idx.docs();
    classify::confusion_matrix matrix;
    auto seconds = common::time<std::chrono::seconds>(
        [&]()
        {
            matrix = c.cross_validate(docs, 5, even);
        });
    std::cerr << "time elapsed: " << seconds.count() << "s" << std::endl;
    matrix.print();
    matrix.print_stats();
    return matrix;
}
Exemple #3
0
void check_split(Index& idx, Classifier& c, double min_accuracy)
{
    // create splits
    std::vector<doc_id> docs = idx.docs();
    std::mt19937 gen(47);
    std::shuffle(docs.begin(), docs.end(), gen);
    size_t split_idx = docs.size() / 8;
    std::vector<doc_id> train_docs{docs.begin() + split_idx, docs.end()};
    std::vector<doc_id> test_docs{docs.begin(), docs.begin() + split_idx};

    // train and test
    c.train(train_docs);
    classify::confusion_matrix mtx = c.test(test_docs);
    ASSERT_GREATER(mtx.accuracy(), min_accuracy);
    ASSERT_LESS(mtx.accuracy(), 100.0);
}
void test_rank(Ranker& r, Index& idx, const std::string& encoding)
{
    for (size_t i = 0; i < idx.num_docs(); ++i)
    {
        auto d_id = idx.docs()[i];
        corpus::document query{idx.doc_path(d_id), doc_id{i}};
        query.encoding(encoding);

        auto ranking = r.score(idx, query);
        ASSERT_EQUAL(ranking.size(), 10); // default is 10 docs

        // since we're searching for a document already in the index, the same
        // document should be ranked first, but there are a few duplicate
        // documents......
        if (ranking[0].first != i)
        {
            ASSERT_EQUAL(ranking[1].first, i);
            ASSERT_APPROX_EQUAL(ranking[0].second, ranking[1].second);
        }
    }
}