/** * Add word @a s to automata @a a. */ void add_word(Automata& a, const string& s) { if (! a.start_node()) { a.start_node() = boost::make_shared<Node>(); } node_p current_node = a.start_node(); size_t j = 0; while (j < s.length()) { uint8_t c = s[j]; node_p next_node = find_next(current_node, c); if (! next_node) { break; } ++j; current_node = next_node; } while (j < s.length()) { uint8_t c = s[j]; ++j; current_node->edges().push_back(Edge()); Edge& edge = current_node->edges().back(); edge.target() = boost::make_shared<Node>(); edge.add(c); current_node = edge.target(); } if (! current_node->first_output()) { byte_vector_t data; copy(s.begin(), s.end(), back_inserter(data)); output_p output = boost::make_shared<Output>(data); current_node->first_output() = output; } else { cerr << "Warning: Duplicate word: " << s << endl; } }
/** * Add word @a s to automata @a a. */ void add_word(Automata& a, const string& s) { if (! a.start_node()) { a.start_node() = make_shared<Node>(); } node_p current_node = a.start_node(); size_t j = 0; while (j < s.length()) { uint8_t c = s[j]; node_p next_node = find_next(current_node, c); if (! next_node) { break; } ++j; current_node = next_node; } while (j < s.length()) { uint8_t c = s[j]; ++j; current_node->edges().push_back(Edge()); Edge& edge = current_node->edges().back(); edge.target() = make_shared<Node>(); edge.add(c); current_node = edge.target(); } assert(! current_node->first_output()); output_p output = make_shared<Output>(); current_node->first_output() = output; IronAutomata::buffer_t content_buffer; IronAutomata::BufferAssembler assembler(content_buffer); assembler.append_object(uint32_t(1)); output->content().assign(content_buffer.begin(), content_buffer.end()); }
//! Main int main(int argc, char** argv) { if (argc < 1 || argc > 2) { cout << "Usage: trie_generator [<chunk_size>]" << endl; cout << "Word list on standard in; one word per line." << endl; cout << "Automata on standard out; intermediate format." << endl; return 1; } try { size_t chunk_size = 0; if (argc == 2) { try { chunk_size = boost::lexical_cast<size_t>(argv[1]); } catch (boost::bad_lexical_cast) { cerr << "Invalid chunk size: " << argv[1] << endl; return 1; } } Automata a; list<string> words; string s; while (cin) { getline(cin, s); if (! s.empty()) { add_word(a, s); } } if (! a.start_node()) { cerr << "No automata generator. Empty input?" << endl; return 1; } breadth_first(a, optimize_edges); deduplicate_outputs(a); a.metadata()["Output-Type"] = "string"; write_automata(a, cout, chunk_size); } catch (const exception& e) { cerr << "Error: " << e.what() << endl; return 1; } return 0; }
//! Main int main(int argc, char** argv) { if (argc < 1 || argc > 2) { cout << "Usage: trie_generator [<chunk_size>]" << endl; return 1; } try { size_t chunk_size = 0; if (argc == 2) { chunk_size = boost::lexical_cast<size_t>(argv[1]); } Automata a; list<string> words; string s; while (cin) { getline(cin, s); if (! s.empty()) { add_word(a, s); } } assert(a.start_node()); breadth_first(a, optimize_edges); deduplicate_outputs(a); a.metadata()["Output-Type"] = "string"; write_automata(a, cout, chunk_size); } catch (const exception& e) { cerr << "Error: " << e.what() << endl; return 1; } return 0; }
// Very basic test; more significant testing will be done by end to end tests. TEST(TestIntermediate, Writer) { using namespace IronAutomata::Intermediate; stringstream s; { Automata a; node_p node = a.start_node() = boost::make_shared<Node>(); output_p output = node->first_output() = boost::make_shared<Output>(); output->content().push_back('7'); output->content().push_back('3'); output_p other_output = output->next_output() = boost::make_shared<Output>(); other_output->content().push_back('9'); node->edges().push_back(Edge()); Edge& edge = node->edges().back(); node_p other_node = edge.target() = boost::make_shared<Node>(); edge.add('5'); write_automata(a, s); s.seekp(0); } IronAutomata::ostream_logger logger(cout); AutomataReader reader(logger); bool success = reader.read_from_istream(s); EXPECT_TRUE(success); EXPECT_TRUE(reader.clean()); EXPECT_TRUE(reader.success()); Automata a = reader.automata(); EXPECT_FALSE(a.no_advance_no_output()); ASSERT_TRUE(bool(a.start_node())); node_p node = a.start_node(); EXPECT_TRUE(node->advance_on_default()); ASSERT_TRUE(bool(node->first_output())); ASSERT_EQ(1UL, node->edges().size()); EXPECT_FALSE(node->default_target()); output_p output = node->first_output(); ASSERT_EQ(2UL, output->content().size()); EXPECT_EQ('7', output->content()[0]); EXPECT_EQ('3', output->content()[1]); ASSERT_TRUE(bool(output->next_output())); output = output->next_output(); ASSERT_EQ(1UL, output->content().size()); EXPECT_EQ('9', output->content()[0]); EXPECT_FALSE(output->next_output()); Edge& edge = node->edges().front(); EXPECT_TRUE(edge.advance()); ASSERT_TRUE(bool(edge.target())); ASSERT_EQ(1UL, edge.size()); EXPECT_EQ('5', *edge.begin()); node = edge.target(); EXPECT_FALSE(node->default_target()); EXPECT_TRUE(node->edges().empty()); EXPECT_TRUE(node->advance_on_default()); EXPECT_FALSE(node->first_output()); }