void convertToNFA(Automata& nfa, State& s, State& e) const { e = nfa.generateState(); State childE; child->convertToNFA(nfa, s, childE); nfa.addTransition(childE, e, Transition::EPSILON); nfa.addTransition(e, s, Transition::EPSILON); }
void convertToNFA(Automata& nfa, State& s, State& e) const { State leftS; State leftE; State rightS; State rightE; left->convertToNFA(nfa, leftS, leftE); right->convertToNFA(nfa, rightS, rightE); s = nfa.generateState(); e = nfa.generateState(); nfa.addTransition(s, leftS, Transition::EPSILON); nfa.addTransition(s, rightS, Transition::EPSILON); nfa.addTransition(leftE, e, Transition::EPSILON); nfa.addTransition(rightE, e, Transition::EPSILON); }
//! Main int main(int argc, char** argv) { if (argc < 1 || argc > 2) { cout << "Usage: trie_generator [<chunk_size>]" << endl; cout << "Word list on standard in; one word per line." << endl; cout << "Automata on standard out; intermediate format." << endl; return 1; } try { size_t chunk_size = 0; if (argc == 2) { try { chunk_size = boost::lexical_cast<size_t>(argv[1]); } catch (boost::bad_lexical_cast) { cerr << "Invalid chunk size: " << argv[1] << endl; return 1; } } Automata a; list<string> words; string s; while (cin) { getline(cin, s); if (! s.empty()) { add_word(a, s); } } if (! a.start_node()) { cerr << "No automata generator. Empty input?" << endl; return 1; } breadth_first(a, optimize_edges); deduplicate_outputs(a); a.metadata()["Output-Type"] = "string"; write_automata(a, cout, chunk_size); } catch (const exception& e) { cerr << "Error: " << e.what() << endl; return 1; } return 0; }
void convertToNFA(Automata& nfa, State& s, State& e) const { s = nfa.generateState(); e = nfa.generateState(); State current = s; for (auto i = siblings.begin(); i != siblings.end(); ++i) { State siblingS; State siblingE; (*i)->convertToNFA(nfa, siblingS, siblingE); nfa.addTransition(current, siblingS, Transition::EPSILON); current = siblingE; } nfa.addTransition(current, e, Transition::EPSILON); }
Parser(typename Encode<E>::PointerType input, Automata& nfa) : reader(input) { nfa.clear(); if (reader.peek() == 0) { return; } auto ast = parseRE(); if (reader.peek() != 0) { throw ParseError(); } State s; State e; ast->convertToNFA(nfa, s, e); nfa.setStart(s); nfa.setTerminate(e); }
/** * Add word @a s to automata @a a. */ void add_word(Automata& a, const string& s) { if (! a.start_node()) { a.start_node() = boost::make_shared<Node>(); } node_p current_node = a.start_node(); size_t j = 0; while (j < s.length()) { uint8_t c = s[j]; node_p next_node = find_next(current_node, c); if (! next_node) { break; } ++j; current_node = next_node; } while (j < s.length()) { uint8_t c = s[j]; ++j; current_node->edges().push_back(Edge()); Edge& edge = current_node->edges().back(); edge.target() = boost::make_shared<Node>(); edge.add(c); current_node = edge.target(); } if (! current_node->first_output()) { byte_vector_t data; copy(s.begin(), s.end(), back_inserter(data)); output_p output = boost::make_shared<Output>(data); current_node->first_output() = output; } else { cerr << "Warning: Duplicate word: " << s << endl; } }
/** * Add word @a s to automata @a a. */ void add_word(Automata& a, const string& s) { if (! a.start_node()) { a.start_node() = make_shared<Node>(); } node_p current_node = a.start_node(); size_t j = 0; while (j < s.length()) { uint8_t c = s[j]; node_p next_node = find_next(current_node, c); if (! next_node) { break; } ++j; current_node = next_node; } while (j < s.length()) { uint8_t c = s[j]; ++j; current_node->edges().push_back(Edge()); Edge& edge = current_node->edges().back(); edge.target() = make_shared<Node>(); edge.add(c); current_node = edge.target(); } assert(! current_node->first_output()); output_p output = make_shared<Output>(); current_node->first_output() = output; IronAutomata::buffer_t content_buffer; IronAutomata::BufferAssembler assembler(content_buffer); assembler.append_object(uint32_t(1)); output->content().assign(content_buffer.begin(), content_buffer.end()); }
//! Main int main(int argc, char** argv) { if (argc < 1 || argc > 2) { cout << "Usage: trie_generator [<chunk_size>]" << endl; return 1; } try { size_t chunk_size = 0; if (argc == 2) { chunk_size = boost::lexical_cast<size_t>(argv[1]); } Automata a; list<string> words; string s; while (cin) { getline(cin, s); if (! s.empty()) { add_word(a, s); } } assert(a.start_node()); breadth_first(a, optimize_edges); deduplicate_outputs(a); a.metadata()["Output-Type"] = "string"; write_automata(a, cout, chunk_size); } catch (const exception& e) { cerr << "Error: " << e.what() << endl; return 1; } return 0; }
bool chainConcatenation(Automata& nfa, int count, bool addEps, State& s, State& e) const { if (count == 0) { return false; } s = nfa.generateState(); e = nfa.generateState(); State current = s; for (int i = 0; i < count; ++i) { State childS; State childE; child->convertToNFA(nfa, childS, childE); nfa.addTransition(current, childS, Transition::EPSILON); if (addEps) { nfa.addTransition(current, e, Transition::EPSILON); } current = childE; } nfa.addTransition(current, e, Transition::EPSILON); return true; }
void convertToNFA(Automata& nfa, State& s, State& e) const { child->convertToNFA(nfa, s, e); nfa.addTransition(s, e, Transition::EPSILON); }
void convertToNFA(Automata& nfa, State& s, State& e) const { s = nfa.generateState(); e = nfa.generateState(); nfa.addTransition(s, e, Transition::WILDCARD); }
void convertToNFA(Automata& nfa, State& s, State& e) const { s = nfa.generateState(); e = nfa.generateState(); nfa.addTransition(s, e, rangeSet); }
void convertToNFA(Automata& nfa, State& s, State& e) const { // special case : {count,} {,} if (maxCount == -1) { State tmpS; State tmpE; bool result = chainConcatenation(nfa, minCount, false, tmpS, tmpE); State kleenS; State kleenE; KleenNode(child).convertToNFA(nfa, kleenS, kleenE); if (result) { s = tmpS; nfa.addTransition(tmpE, kleenS, Transition::EPSILON); e = kleenE; } else { s = kleenS; e = kleenE; } } // other case : {count}, {count1, count2}, {,count2} else if (minCount <= maxCount && minCount >= 0) { State firstS; State firstE; State secondS; State secondE; bool resultFirst = chainConcatenation(nfa, minCount, false, firstS, firstE); bool resultSecond = chainConcatenation(nfa, maxCount - minCount, true, secondS, secondE); if (!resultFirst && !resultSecond) { s = nfa.generateState(); e = nfa.generateState(); nfa.addTransition(s, e, Transition::EPSILON); return; } if (resultFirst) { s = firstS; } else { s = secondS; } if (resultSecond) { e = secondE; } else { e = firstE; } if (resultFirst && resultSecond) { nfa.addTransition(firstE, secondS, Transition::EPSILON); } } else { throw ParseError(); } }
// Very basic test; more significant testing will be done by end to end tests. TEST(TestIntermediate, Writer) { using namespace IronAutomata::Intermediate; stringstream s; { Automata a; node_p node = a.start_node() = boost::make_shared<Node>(); output_p output = node->first_output() = boost::make_shared<Output>(); output->content().push_back('7'); output->content().push_back('3'); output_p other_output = output->next_output() = boost::make_shared<Output>(); other_output->content().push_back('9'); node->edges().push_back(Edge()); Edge& edge = node->edges().back(); node_p other_node = edge.target() = boost::make_shared<Node>(); edge.add('5'); write_automata(a, s); s.seekp(0); } IronAutomata::ostream_logger logger(cout); AutomataReader reader(logger); bool success = reader.read_from_istream(s); EXPECT_TRUE(success); EXPECT_TRUE(reader.clean()); EXPECT_TRUE(reader.success()); Automata a = reader.automata(); EXPECT_FALSE(a.no_advance_no_output()); ASSERT_TRUE(bool(a.start_node())); node_p node = a.start_node(); EXPECT_TRUE(node->advance_on_default()); ASSERT_TRUE(bool(node->first_output())); ASSERT_EQ(1UL, node->edges().size()); EXPECT_FALSE(node->default_target()); output_p output = node->first_output(); ASSERT_EQ(2UL, output->content().size()); EXPECT_EQ('7', output->content()[0]); EXPECT_EQ('3', output->content()[1]); ASSERT_TRUE(bool(output->next_output())); output = output->next_output(); ASSERT_EQ(1UL, output->content().size()); EXPECT_EQ('9', output->content()[0]); EXPECT_FALSE(output->next_output()); Edge& edge = node->edges().front(); EXPECT_TRUE(edge.advance()); ASSERT_TRUE(bool(edge.target())); ASSERT_EQ(1UL, edge.size()); EXPECT_EQ('5', *edge.begin()); node = edge.target(); EXPECT_FALSE(node->default_target()); EXPECT_TRUE(node->edges().empty()); EXPECT_TRUE(node->advance_on_default()); EXPECT_FALSE(node->first_output()); }
int main(int argc, char ** argv) { if (argc != 2) return -1; char * search = argv[1]; Automata * a = new Automata(); #if 1 a->add_string("automata"); a->add_string("tomata"); a->add_string("tomi"); a->add_string("automata"); a->add_string("tomato"); a->add_string("mata"); a->add_string("amatti"); a->add_string("car"); a->add_string("application"); a->add_string("bridge"); a->add_string("cludge"); a->add_string("ban"); a->add_string("sandwich"); a->add_string("tomi"); a->add_string("back"); a->add_string("stack"); a->add_string("geek"); a->add_string("beep"); a->add_string("deep"); a->add_string("zip"); a->add_string("combined"); a->add_string("goody"); a->add_string("Automata constructed"); #endif a->add_string("9cf386a6cbbecdb999fd98ec89ea9ebecaa2cbb898fe97fb9eb0"); a->add_string("96f98cacc1b4c7b393f792e683e094b4c0a8c1b292f49df194ba"); a->add_string("4c2356761b6e1d69492d483c593a4e6e1a721b68482e472b4e60"); a->add_string("5c5c906f4a46662626d9fcfcdc9c9c634642622222ddf8f0d090"); a->add_string("b95a010000be"); cout<<"Automata constructed \n"; a->construct_fail_links(); cout<<"Fail links constructed \n"; cout<<"Match a string "<<search<<" returned : "<< a->is_match(search)<<endl; return 0; }
int main(int argc, char* argv[]) { cout << "=============================================================================="<< endl; cout << " RegexCpp" << endl; cout << " 0. exit: Quit the program" << endl; cout << " 1. match <pattern> <text>: Do regex string matching" << endl; cout << " 2. debug <on|off>: Show running log or not, default not" << endl; cout << " 3. clear: Clear the screen" << endl; cout << "==============================================================================" << endl; char* cmdline = new char[MAXN]; char* cmd; char* pattern; char* text; char* para; Parser parser; Automata automata; while(true) { cout << ">>>>"; cin.getline(cmdline, MAXN); cmd = strtok(cmdline, " "); if(0 == strcmp(cmd, "exit")) { return 0; } if(0 == strcmp(cmd, "match")) { pattern = strtok(NULL, " "); text = strtok(NULL, " "); if(pattern == NULL || text == NULL) { cout << "Incomplete parameters for [match] command " << endl; continue; } Node* root = parser.Parse(pattern); //½âÎöÊ÷ if(root == NULL) { cout << red << "Error parsing the pattern!!" << flushcolor; continue; } State* start = automata.CreateAutomata(root); bool bingo = automata.Match(start, text); } else if(0 == strcmp(cmd, "debug")) { para = strtok(NULL, " "); if(para == NULL) { if(parser.GetDebug()) cout << yellow << "Debug mode is on " << flushcolor; else cout << yellow << "Debug mode is off " << flushcolor; continue; } if(0 == strcmp(para, "on")) { parser.SetDebug(true); automata.SetDebug(true); } else if(0 == strcmp(para, "off")) { parser.SetDebug(false); automata.SetDebug(false); } else { cout << red << "---------------------- Invalid parameters for [debug] command-------------------" << flushcolor; } } else if(0 == strcmp(cmd, "clear")) { system("cls"); } else { cout << red << "--------------------------------Unknown command-------------------------------" << flushcolor; } } return 0; }