Пример #1
0
TEST(naive_bayes, serialization) {
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world", "world"};
    std::vector<std::string> words2{"f**k", "world", "world"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);
    auto scores = bayes.scores(examples[0]);

    TempFile tmp_file;
    std::ofstream ofs(tmp_file.filename);
    boost::archive::text_oarchive oa(ofs);
    oa << bayes;
    ofs.close();

    NaiveBayes bayes_restore;
    std::ifstream ifs(tmp_file.filename);
    boost::archive::text_iarchive ia(ifs);
    ia >> bayes_restore;

    auto scores_restore = bayes_restore.scores(examples[0]);

    EXPECT_EQ(scores.get("positive"), scores_restore.get("positive"));
    EXPECT_EQ(scores.get("negative"), scores_restore.get("negative"));
}
Пример #2
0
TEST(naive_bayes, example) {
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world", "world"};
    std::vector<std::string> words2{"f**k", "world", "world"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);
    print_scores(bayes.scores(examples[0]));
    print_scores(bayes.scores(examples[1]));
}
Пример #3
0
TEST(naive_bayes, nomatching_words_example) {
    // When nothing in dict matches, we want an empty map returned,
    // rather than smoothed class distributions.
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world"};
    std::vector<std::string> words2{"good", "morning"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);

    std::vector<std::string> missing_words{"escape", "notfound"};
    Example missing_example(missing_words, "positive");
    auto scores = bayes.scores(missing_example);
    EXPECT_EQ(0, scores.size());
    EXPECT_EQ(0, scores.sum());
}