Пример #1
0
/**
 * Add word @a s to automata @a a.
 */
void add_word(Automata& a, const string& s)
{
    if (! a.start_node()) {
        a.start_node() = boost::make_shared<Node>();
    }

    node_p current_node = a.start_node();
    size_t j = 0;
    while (j < s.length()) {
        uint8_t c = s[j];

        node_p next_node = find_next(current_node, c);
        if (! next_node) {
            break;
        }
        ++j;

        current_node = next_node;
    }

    while (j < s.length()) {
        uint8_t c = s[j];
        ++j;

        current_node->edges().push_back(Edge());
        Edge& edge = current_node->edges().back();
        edge.target() = boost::make_shared<Node>();
        edge.add(c);
        current_node = edge.target();
    }

    if (! current_node->first_output()) {
        byte_vector_t data;
        copy(s.begin(), s.end(), back_inserter(data));

        output_p output = boost::make_shared<Output>(data);
        current_node->first_output() = output;
    }
    else {
        cerr << "Warning: Duplicate word: " << s << endl;
    }
}
Пример #2
0
/**
 * Add word @a s to automata @a a.
 */
void add_word(Automata& a, const string& s)
{
    if (! a.start_node()) {
        a.start_node() = make_shared<Node>();
    }

    node_p current_node = a.start_node();
    size_t j = 0;
    while (j < s.length()) {
        uint8_t c = s[j];

        node_p next_node = find_next(current_node, c);
        if (! next_node) {
            break;
        }
        ++j;

        current_node = next_node;
    }

    while (j < s.length()) {
        uint8_t c = s[j];
        ++j;

        current_node->edges().push_back(Edge());
        Edge& edge = current_node->edges().back();
        edge.target() = make_shared<Node>();
        edge.add(c);
        current_node = edge.target();
    }

    assert(! current_node->first_output());
    output_p output = make_shared<Output>();
    current_node->first_output() = output;

    IronAutomata::buffer_t content_buffer;
    IronAutomata::BufferAssembler assembler(content_buffer);
    assembler.append_object(uint32_t(1));

    output->content().assign(content_buffer.begin(), content_buffer.end());
}
Пример #3
0
//! Main
int main(int argc, char** argv)
{
    if (argc < 1 || argc > 2) {
        cout << "Usage: trie_generator [<chunk_size>]" << endl;
        cout << "Word list on standard in; one word per line." << endl;
        cout << "Automata on standard out; intermediate format." << endl;
        return 1;
    }

    try {
        size_t chunk_size = 0;
        if (argc == 2) {
            try {
                chunk_size = boost::lexical_cast<size_t>(argv[1]);
            }
            catch (boost::bad_lexical_cast) {
                cerr << "Invalid chunk size: " << argv[1] << endl;
                return 1;
            }
        }

        Automata a;

        list<string> words;
        string s;
        while (cin) {
            getline(cin, s);
            if (! s.empty()) {
                add_word(a, s);
            }
        }

        if (! a.start_node()) {
            cerr << "No automata generator.  Empty input?" << endl;
            return 1;
        }

        breadth_first(a, optimize_edges);
        deduplicate_outputs(a);

        a.metadata()["Output-Type"] = "string";

        write_automata(a, cout, chunk_size);
    }
    catch (const exception& e) {
        cerr << "Error: " << e.what() << endl;
        return 1;
    }

    return 0;
}
Пример #4
0
//! Main
int main(int argc, char** argv)
{
    if (argc < 1 || argc > 2) {
        cout << "Usage: trie_generator [<chunk_size>]" << endl;
        return 1;
    }

    try {
        size_t chunk_size = 0;
        if (argc == 2) {
            chunk_size = boost::lexical_cast<size_t>(argv[1]);
        }

        Automata a;

        list<string> words;
        string s;
        while (cin) {
            getline(cin, s);
            if (! s.empty()) {
                add_word(a, s);
            }
        }

        assert(a.start_node());

        breadth_first(a, optimize_edges);
        deduplicate_outputs(a);

        a.metadata()["Output-Type"] = "string";

        write_automata(a, cout, chunk_size);
    }
    catch (const exception& e) {
        cerr << "Error: " << e.what() << endl;
        return 1;
    }

    return 0;
}
Пример #5
0
// Very basic test; more significant testing will be done by end to end tests.
TEST(TestIntermediate, Writer)
{
    using namespace IronAutomata::Intermediate;

    stringstream s;

    {
        Automata a;
        node_p   node   = a.start_node()       = boost::make_shared<Node>();
        output_p output = node->first_output() = boost::make_shared<Output>();

        output->content().push_back('7');
        output->content().push_back('3');

        output_p other_output = output->next_output() = boost::make_shared<Output>();
        other_output->content().push_back('9');

        node->edges().push_back(Edge());
        Edge& edge = node->edges().back();

        node_p other_node = edge.target() = boost::make_shared<Node>();
        edge.add('5');

        write_automata(a, s);
        s.seekp(0);
    }

    IronAutomata::ostream_logger logger(cout);
    AutomataReader reader(logger);

    bool success = reader.read_from_istream(s);
    EXPECT_TRUE(success);
    EXPECT_TRUE(reader.clean());
    EXPECT_TRUE(reader.success());

    Automata a = reader.automata();

    EXPECT_FALSE(a.no_advance_no_output());
    ASSERT_TRUE(bool(a.start_node()));
    node_p node = a.start_node();
    EXPECT_TRUE(node->advance_on_default());
    ASSERT_TRUE(bool(node->first_output()));
    ASSERT_EQ(1UL, node->edges().size());
    EXPECT_FALSE(node->default_target());
    output_p output = node->first_output();
    ASSERT_EQ(2UL, output->content().size());
    EXPECT_EQ('7', output->content()[0]);
    EXPECT_EQ('3', output->content()[1]);
    ASSERT_TRUE(bool(output->next_output()));
    output = output->next_output();
    ASSERT_EQ(1UL, output->content().size());
    EXPECT_EQ('9', output->content()[0]);
    EXPECT_FALSE(output->next_output());
    Edge& edge = node->edges().front();
    EXPECT_TRUE(edge.advance());
    ASSERT_TRUE(bool(edge.target()));
    ASSERT_EQ(1UL, edge.size());
    EXPECT_EQ('5', *edge.begin());
    node = edge.target();
    EXPECT_FALSE(node->default_target());
    EXPECT_TRUE(node->edges().empty());
    EXPECT_TRUE(node->advance_on_default());
    EXPECT_FALSE(node->first_output());
}