Beispiel #1
0
std::string get_content(const corpus::document& doc)
{
    if (!doc.contains_content())
        throw analyzer_exception{
            "document content was not populated for analysis"};

    return utf::to_utf8(doc.content(), doc.encoding());
}
Beispiel #2
0
void check_analyzer_expected(Analyzer& ana, corpus::document doc,
                             uint64_t num_unique, uint64_t length)
{
    ana.tokenize(doc);
    ASSERT_EQUAL(doc.counts().size(), num_unique);
    ASSERT_EQUAL(doc.length(), length);
    ASSERT_EQUAL(doc.id(), 47ul);
    if (doc.contains_content())
    {
        ASSERT_EQUAL(doc.path(), "/home/person/filename.txt");
        ASSERT_EQUAL(doc.name(), "filename.txt");
    }
    else
    {
        ASSERT_EQUAL(doc.path(), "../data/sample-document.txt");
        ASSERT_EQUAL(doc.name(), "sample-document.txt");
    }
}