예제 #1
0
파일: gz_corpus.cpp 프로젝트: domarps/meta
document gz_corpus::next()
{
    class_label label{"[none]"};

    if (class_stream_)
        std::getline(class_stream_, static_cast<std::string&>(label));

    std::string line;
    std::getline(corpus_stream_, line);

    document doc{cur_id_++, label};
    doc.content(line, encoding());
    doc.mdata(next_metadata());

    return doc;
}
예제 #2
0
파일: gz_corpus.cpp 프로젝트: MGKhKhD/meta
document gz_corpus::next()
{
    class_label label{"[none]"};

    if (class_stream_)
        std::getline(class_stream_, static_cast<std::string&>(label));

    std::string line;
    std::getline(corpus_stream_, line);

    document doc{cur_id_++, label};
    doc.content(line, encoding());

    auto mdata = next_metadata();
    if (store_full_text())
        mdata.insert(mdata.begin(), metadata::field{doc.content()});
    doc.mdata(std::move(mdata));

    return doc;
}
예제 #3
0
document file_corpus::next()
{
    document doc{doc_id{cur_}, docs_[cur_].second};

    if (!filesystem::file_exists(prefix_ + docs_[cur_].first))
        throw corpus_exception{"file \"" + docs_[cur_].first
                               + "\" does not exist"};

    doc.content(filesystem::file_text(prefix_ + docs_[cur_].first), encoding());

    auto mdata = next_metadata();
    if (store_full_text())
        mdata.insert(mdata.begin(), metadata::field{doc.content()});

    // add "path" metadata manually
    mdata.insert(mdata.begin(), metadata::field{prefix_ + docs_[cur_].first});
    doc.mdata(std::move(mdata));

    ++cur_;
    return doc;
}