NFA getNFA_AND(NFA n1, NFA n2) { int offset = n1.states.size(); vector<State> states; NFA ret = n1; for(int i = 0; i < n1.states.size(); i++) { states.push_back(State(false)); if(n1.states[i].isAccepted()) ret.trans_func[i][NFA::lambda].push_back(n2.start_state + offset); } for(int i = 0; i < n2.states.size(); i++) { ret.setTransFunc(i + offset, n2.trans_func[i]); for(map<char, vector<int> >::iterator it = ret.trans_func[i+offset].begin(); it != ret.trans_func[i+offset].end(); it++) { if(find(ret.alphabet.begin(), ret.alphabet.end(), it->first) == ret.alphabet.end()) ret.alphabet.push_back(it->first); for(vector<int>::iterator jt = it->second.begin(); jt != it->second.end(); jt++) { *jt = *jt + offset; } } if(n2.states[i].isAccepted()) states.push_back(State(true)); else states.push_back(State(false)); } if(find(ret.alphabet.begin(), ret.alphabet.end(), NFA::lambda) == ret.alphabet.end()) ret.alphabet.push_back(NFA::lambda); ret.setStateSet(states); return ret; }
// item: ATOM('+'|'*'|'?') void Grammar::parseItem(string &ruleName, NFA **start, NFA **end) { parseAtom(ruleName, start, end); assert(*start != NULL); assert(*end != NULL); // check to see wether repeator exist? if (isMatch(TT_OP, "+")) { (*end)->arc(*start); advanceToken(); } else if (isMatch(TT_OP, "*")) { NFA *startState = new NFA(); NFA *endState = new NFA(); startState->arc(endState); startState->arc(*start); (*end)->arc(*start); (*end)->arc(endState); *start = startState; *end = endState; advanceToken(); } else if (isMatch(TT_OP, "?")) { NFA *endState = new NFA(); (*end)->arc(endState); (*start)->arc(endState); *end = endState; advanceToken(); } }
NFA RE2NFA::parsePiece() { NFA atom = parseAtom(); if (atom.isEmpty() || !hasNext()) return atom; return parseMaybeQuantifier(atom); }
/// parse the alternative, such as alternative : items (| items)* void Grammar::parseAlternative(string &ruleName, NFA **start, NFA **end) { assert(start != NULL); assert(end != NULL); // parse items parseItems(ruleName, start, end); if (isMatch(TT_OP, "|")) { // make a closing state NFA *closingStartState = new NFA(); NFA *closingEndState = new NFA; closingStartState->arc(*start); (*end)->arc(closingEndState); while (isMatch(TT_OP, "|")) { advanceToken(); NFA *startState = NULL; NFA *endState = NULL; parseItems(ruleName, &startState, &endState); closingStartState->arc(startState); endState->arc(closingEndState); } *start = closingStartState; *end = closingEndState; } }
NFA NFA::createSingleInputNFA(InputType input) { NFA result; result.initialize(2); result.addTransition(result.initialState, input, result.finalState); return result; }
Ref<CompiledContentExtension> compileRuleList(const String& ruleList) { auto parsedRuleList = parseRuleList(ruleList); #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING double nfaBuildTimeStart = monotonicallyIncreasingTime(); #endif Vector<SerializedActionByte> actions; Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions); NFA nfa; URLFilterParser urlFilterParser(nfa); for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) { const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex]; const Trigger& trigger = contentExtensionRule.trigger(); ASSERT(trigger.urlFilter.length()); String error = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocations[ruleIndex]); if (!error.isNull()) { dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), error.utf8().data()); continue; } } #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING double nfaBuildTimeEnd = monotonicallyIncreasingTime(); dataLogF(" Time spent building the NFA: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart)); #endif #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING nfa.debugPrintDot(); #endif #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING double dfaBuildTimeStart = monotonicallyIncreasingTime(); #endif const DFA dfa = NFAToDFA::convert(nfa); #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING double dfaBuildTimeEnd = monotonicallyIncreasingTime(); dataLogF(" Time spent building the DFA: %f\n", (dfaBuildTimeEnd - dfaBuildTimeStart)); #endif // FIXME: never add a DFA that only matches the empty set. #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING dfa.debugPrintDot(); #endif Vector<DFABytecode> bytecode; DFABytecodeCompiler compiler(dfa, bytecode); compiler.compile(); return CompiledContentExtension::create(WTF::move(bytecode), WTF::move(actions)); }
void CombinedURLFilters::processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler) { #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING print(); #endif while (true) { // Traverse out to a leaf. Vector<PrefixTreeVertex*, 128> stack; PrefixTreeVertex* vertex = m_prefixTreeRoot.get(); while (true) { ASSERT(vertex); stack.append(vertex); if (vertex->edges.isEmpty()) break; vertex = vertex->edges.last().child.get(); } if (stack.size() == 1) break; // We're done once we have processed and removed all the edges in the prefix tree. // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one, // or the root if there are no quantifiers left. while (stack.size() > 1) { if (!stack[stack.size() - 2]->edges.last().term.hasFixedLength()) break; stack.removeLast(); } ASSERT_WITH_MESSAGE(!stack.isEmpty(), "At least the root should be in the stack"); // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier). NFA nfa; // Put the prefix into the NFA. unsigned prefixEnd = nfa.root(); for (unsigned i = 0; i < stack.size() - 1; ++i) { ASSERT(!stack[i]->edges.isEmpty()); const PrefixTreeEdge& edge = stack[i]->edges.last(); prefixEnd = edge.term.generateGraph(nfa, prefixEnd, edge.child->finalActions); } // Put the non-quantified vertices in the subtree into the NFA and delete them. ASSERT(stack.last()); generateNFAForSubtree(nfa, prefixEnd, *stack.last(), maxNFASize); handler(WTF::move(nfa)); // Clean up any processed leaf nodes. while (true) { if (stack.size() > 1) { if (stack[stack.size() - 1]->edges.isEmpty()) { stack[stack.size() - 2]->edges.removeLast(); stack.removeLast(); } else break; // Vertex is not a leaf. } else break; // Leave the empty root. } } }
int main() { NFA a = NFA('a'); NFA b = NFA('b'); NFA x = NFA('b'); x = x + b + a + *(b|a); x.show(); return 0; }
NFA NFA::createStringNFA(const QByteArray &str) { NFA result; foreach (char c, str) { NFA ch = NFA::createSingleInputNFA(c); if (result.isEmpty()) result = ch; else result = NFA::createConcatenatingNFA(result, ch); }
unique_ptr<NFA> ConcatExp::buildNFA() { NFA* nfa = new NFA(); for (auto& child : m_childExps) { unique_ptr<NFA> cnfa = child->buildNFA(); nfa->merge(*cnfa, std::make_pair(nfa->endPoint(), 0)); } return unique_ptr<NFA>(nfa); }
NFA RE2NFA::parseBranch() { NFA value = parsePiece(); if (!hasNext()) return value; NFA next; do { next = parsePiece(); if (!next.isEmpty()) value = NFA::createConcatenatingNFA(value, next); } while (!next.isEmpty() && hasNext()); return value; }
int main() { NFA test1 = getNFAbyString("a"); NFA test2 = getNFA_Star(test1); test2.print(); char s[1024], postfix[1024]; while(scanf("%s", s) == 1) { trans(s, postfix); puts(postfix); NFA ret = calcPostfix(postfix); ret.print(); } return 0; }
NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b) { NFA result; int initialA, finalA, initialB, finalB; result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB); result.addTransition(result.initialState, Epsilon, initialA); result.addTransition(finalA, Epsilon, initialB); result.addTransition(finalB, Epsilon, result.finalState); return result; }
unsigned long regex_parser::parse_regex_group(FILE *file, int group[]){ unsigned long size = _INFINITY; do { NFA *nfa = group_regex(file, group); nfa->remove_epsilon(); nfa->reduce(); DFA *dfa = nfa->nfa2dfa(); delete nfa; size = dfa->size(); delete dfa; } while (0); return size; }
NFA or_selection(vector<NFA> selections, int no_of_selections) { NFA result; int vertex_count = 2; int i, j; NFA med; trans new_trans; for(i = 0; i < no_of_selections; i++) { vertex_count += selections.at(i).get_vertex_count(); } result.set_vertex(vertex_count); int adder_track = 1; for(i = 0; i < no_of_selections; i++) { result.set_transition(0, adder_track, '^'); med = selections.at(i); for(j = 0; j < med.transitions.size(); j++) { new_trans = med.transitions.at(j); result.set_transition(new_trans.vertex_from + adder_track, new_trans.vertex_to + adder_track, new_trans.trans_symbol); } adder_track += med.get_vertex_count(); result.set_transition(adder_track - 1, vertex_count - 1, '^'); } result.set_final_state(vertex_count - 1); return result; }
void writeGraphviz(std::ostream& out, const NFA& graph) { out << "digraph G {\n rankdir=LR;\n ranksep=equally;\n node [shape=\"circle\"];" << std::endl; for (const NFA::VertexDescriptor v : graph.vertices()) { writeVertex(out, v, graph); } for (const NFA::VertexDescriptor head : graph.vertices()) { for (uint32_t j = 0; j < graph.outDegree(head); ++j) { writeEdge(out, head, graph.outVertex(head, j), j, graph); } } out << "}" << std::endl; }
NFA RE2NFA::parse(const QString &expression, int *errCol) { tokenize(expression); if (symbols.isEmpty()) return NFA(); index = 0; NFA result = parseExpr(); if (result.isEmpty()) { if (errCol) *errCol = errorColumn; } return result; }
unique_ptr<NFA> OrExp::buildNFA() { NFA* nfa = new NFA(); std::vector<int> v; for (auto& child : m_childExps) { unique_ptr<NFA> cnfa = child->buildNFA(); int ne = nfa->merge(*cnfa, std::make_pair(0, 0)); v.push_back(ne); } int end = nfa->allocPoint(); for (int ne : v) { nfa->addEdge(ne, epsilon, end); } return unique_ptr<NFA>(nfa); }
NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b) { NFA result; int newInitialA, newFinalA, newInitialB, newFinalB; result.initializeFromPair(a, b, &newInitialA, &newFinalA, &newInitialB, &newFinalB); result.addTransition(result.initialState, Epsilon, newInitialA); result.addTransition(result.initialState, Epsilon, newInitialB); result.addTransition(newFinalA, Epsilon, result.finalState); result.addTransition(newFinalB, Epsilon, result.finalState); return result; }
unique_ptr<NFA> RepeatExp::buildNFA() { assert(childCount() == 1); NFA* nfa = new NFA(); nfa->allocPoint(); nfa->addEdge(0, epsilon, 1); unique_ptr<NFA> cnfa = firstChild()->buildNFA(); nfa->merge(*cnfa, std::make_pair(nfa->endPoint(), 0)); nfa->addEdge(nfa->endPoint(), epsilon, 1); nfa->addEdge(0, epsilon, nfa->endPoint()); return unique_ptr<NFA>(nfa); }
void *regex_parser::parse_re(NFA* nfa, const char *re){ int ptr=0; bool tilde_re=false; NFA *non_anchored = *(nfa->get_epsilon()->begin()); NFA *anchored = *(++nfa->get_epsilon()->begin()); //check whether the text must match at the beginning of the regular expression if (re[ptr]==TILDE){ tilde_re=true; ptr++; } NFA *fa=parse_re(re,&ptr,false); fa->get_last()->accept(); if (!tilde_re){ non_anchored->link(fa->get_first()); }else{ anchored->link(fa->get_first()); } }
NFA getNFA_Star(NFA nfa) { int head = 0; int tail = 1; int offset = 2; int start = nfa.start_state + offset; vector<State> states; NFA ret; ret.setAlphabetSet(nfa.alphabet); ret.setStartState(head); ret.setTransFunc(head, map<char, vector<int> >()); ret.setTransFunc(tail, map<char, vector<int> >()); states.push_back(State(false)); states.push_back(State(true)); for(int i = 0; i < nfa.states.size(); i++) { ret.setTransFunc(i+offset, nfa.trans_func[i]); for(map<char, vector<int> >::iterator it = ret.trans_func[i+offset].begin(); it != ret.trans_func[i+offset].end(); it++) { for(vector<int>::iterator jt = it->second.begin(); jt != it->second.end(); jt++) { *jt = *jt + offset; } } states.push_back(State(false)); if(nfa.states[i].isAccepted()) { ret.trans_func[i + offset][NFA::lambda].push_back(start); ret.trans_func[i + offset][NFA::lambda].push_back(tail); } } if(find(ret.alphabet.begin(), ret.alphabet.end(), NFA::lambda) == ret.alphabet.end()) ret.alphabet.push_back(NFA::lambda); ret.trans_func[head][NFA::lambda].push_back(start); ret.setStateSet(states); return ret; }
// This is called when a tree has built. void update() { assert(root != NULL); setAccepts(); //root->print(); //std::cout << std::endl; //-- NFAState *next = nfa->getStartState(); for(std::list<CNode *>::iterator i = root->children.begin(); i != root->children.end(); ++i ) { CNode *node = *i; if(node->children.empty()) { NFAState *tmp = nfa->newState(); next->addEdge(tmp, node->value); if(node->accept) tmp->makeFinal(acceptStr, acceptFlags); next = tmp; } else { NFAState *tmp = next; std::list<CNode *>::iterator j = node->children.begin(); for(unsigned i = 1; i < node->children.size(); ++i, ++j) { NFAState *tmp2 = nfa->newState(); tmp->addEdge(tmp2, (*j)->value); tmp = tmp2; } tmp->addEdge(next, node->children.back()->value); } } //-- }
std::vector<std::vector<NFA::VertexDescriptor>> pivotStates(NFA::VertexDescriptor source, const NFA& graph) { std::vector<std::vector<NFA::VertexDescriptor>> ret(256); ByteSet permitted; for (const NFA::VertexDescriptor ov : graph.outVertices(source)) { graph[ov].Trans->getBytes(permitted); for (uint32_t i = 0; i < 256; ++i) { if (permitted[i] && std::find(ret[i].begin(), ret[i].end(), ov) == ret[i].end()) { ret[i].push_back(ov); } } } return ret; }
//========================================================================================================== // Calculate the epsiolon closure for a state in the NFA. This is needed for constructing the DFA. //========================================================================================================== set<int> DFA::calc_epsilon_closure(int state, NFA& nfa) { set<int> res; vector<int> cur_states; res.insert(state); cur_states.push_back(state); //------------------------------------------------------------------------------------------------------ // As long as there are states in the list, add their epsilon neighbors, but only if they weren't added // yet //------------------------------------------------------------------------------------------------------ for(int i = 0; i < cur_states.size(); ++i) { for(int s: nfa.get_epsilon_transitions(cur_states[i])) { if(res.count(s) == 0) { res.insert(s); cur_states.push_back(s); } } } return res; }
NFA NFA::createOptionalNFA(const NFA &a) { NFA result; result.initialize(a.states.count() + 2); int baseIdxA = 1; int initialA = a.initialState + baseIdxA; int finalA = a.finalState + baseIdxA; result.copyFrom(a, baseIdxA); result.addTransition(result.initialState, Epsilon, initialA); result.addTransition(result.initialState, Epsilon, result.finalState); result.addTransition(finalA, Epsilon, initialA); result.addTransition(finalA, Epsilon, result.finalState); return result; }
NFA getNFA_OR(NFA n1, NFA n2) { int head = n1.states.size() + n2.states.size(); int tail = head + 1; int offset = n1.states.size(); vector<State> states; map<char, vector<int> > h_trans, t_trans; NFA ret; ret.setStartState(head); ret.alphabet = n1.alphabet; for(int i = 0; i < n1.states.size(); i++) { ret.setTransFunc(i, n1.trans_func[i]); states.push_back(State(false)); if(n1.states[i].isAccepted()) ret.trans_func[i][NFA::lambda].push_back(tail); } for(int i = 0; i < n2.states.size(); i++) { ret.setTransFunc(i + offset, n2.trans_func[i]); for(map<char, vector<int> >::iterator it = ret.trans_func[i+offset].begin(); it != ret.trans_func[i+offset].end(); it++) { if(find(ret.alphabet.begin(), ret.alphabet.end(), it->first) == ret.alphabet.end()) ret.alphabet.push_back(it->first); for(vector<int>::iterator jt = it->second.begin(); jt != it->second.end(); jt++) { *jt = *jt + offset; } } states.push_back(State(false)); if(n2.states[i].isAccepted()) ret.trans_func[i + offset][NFA::lambda].push_back(tail); } if(find(ret.alphabet.begin(), ret.alphabet.end(), NFA::lambda) == ret.alphabet.end()) ret.alphabet.push_back(NFA::lambda); h_trans[NFA::lambda].push_back(n1.start_state); h_trans[NFA::lambda].push_back(n2.start_state + offset); ret.setTransFunc(head, h_trans); ret.setTransFunc(tail, t_trans); states.push_back(State(false)); states.push_back(State(true)); ret.setStateSet(states); return ret; }
NFA getNFAbyString(const char str[]) { NFA ret; ret.setStartState(0); vector<State> states; vector<char> alphabets; int len = strlen(str); set<char> S; for(int i = 0; i < len; i++) { map<char, vector<int> > trans_func; trans_func[str[i]].push_back(i+1); states.push_back(State()); ret.setTransFunc(i, trans_func); if(S.find(str[i]) == S.end()) S.insert(str[i]), alphabets.push_back(str[i]); } ret.setTransFunc(len, map<char, vector<int> >()); states.push_back(State(true)); ret.setStateSet(states); ret.setAlphabetSet(alphabets); return ret; }
void useNfa() { NewQDFA dfa; buildTransitionMap(dfa); NFA *tmpNFA = nfa; NFA &nfa = *tmpNFA; //--- NewQDFA &output = nfa.newQDFA; QDFASet startState; startState.state.insert(nfa.getStartState()); // std::cout << "---begin NFA---" << std::endl; std::set<QDFASet> wanted; wanted.insert(startState); output.start = startState; while(!wanted.empty()) { std::set<QDFASet>::iterator j = wanted.begin(); QDFASet js = *j; wanted.erase(j); // 'js' has a set of QNFAState pointers. if(output.transitionMap.find(js) != output.transitionMap.end()) { std::cout << "NFA to DFA - internal error" << std::endl; throw -1; } output.transitionMap[js] = NewQDFAItems(); NewQDFAItems &io = output.transitionMap[js]; bool accept = false; std::string finalStr = ""; unsigned finalFlags = 0; //[+] // 'js' is a set of QNFAState pointers that we want. // Look at each row in 'js'. for(std::set<QNFAState *>::iterator k = js.state.begin(); k != js.state.end(); ++k ) { if((*k)->isFinal) { if(accept) { if(finalStr != (*k)->finalStr || finalFlags != (*k)->finalFlags) { std::cout << "Ambiguous accept string or flags!" << std::endl; throw -1; } } accept = true; finalStr = (*k)->finalStr; finalFlags = (*k)->finalFlags; //std::cout << "accept:"; } //std::cout << std::hex << *k << std::dec << " "; // Now for each row in 'js', go thru each column. for(std::map<int, QNFATarget >::iterator h = (*k)->transitionMap.begin(); h != (*k)->transitionMap.end(); ++h ) { for(std::set<QNFAState *>::iterator hh = h->second.targets.begin(); hh != h->second.targets.end(); ++hh ) { io.targets[h->first].state.insert(*hh); } } } // Now look at io.targets[x] for all x. for(std::map<int, QDFASet>::iterator xi = io.targets.begin(); xi != io.targets.end(); ++xi ) { QDFASet &xt = xi->second; if(output.transitionMap.find(xt) == output.transitionMap.end()) { wanted.insert(xt); } } if(accept) { io.accept = true; io.finalStr = finalStr; io.finalFlags = finalFlags; } } //std::cout << std::endl; //std::cout << "--- end NFA ---" << std::endl; //generateOutput(); //--- useDfa(output); }
DFA DFA::FROM_NFA(NFA nfa){ DFA dfa; NFATable nfa_table = nfa.getTable(); fa_table nfa_table_map = nfa_table.getMapping(); unordered_set<string> visited; int_set first_set = nfa_table_map[0][Symbol::EPSILON]; queue<int_set> set_queue= queue<int_set>{{first_set}}; // set_queue.push(first_set); int dfa_state_count = 0; string set_name = Utils::TO_STRING(first_set); unordered_map<string, int> dfa_state_mapping = unordered_map<string, int> {{ set_name, 0 }}; dfa.addState("q" + to_string(0)); int nfa_state_count = nfa_table_map.size(); while (!set_queue.empty()){ int_set curr_state_set = set_queue.front(); set_queue.pop(); // set_name = Utils::TO_STRING(curr_state_set); string state_name = Utils::TO_STRING(curr_state_set); if (visited.find(state_name) != visited.end()) continue; visited.insert(state_name); bool is_final_state = false; { bool states_visited[nfa_state_count] = { 0 }; for (auto st : curr_state_set) states_visited[st] = true; for (auto curr_state : curr_state_set){ int_set e_closure = nfa_table_map[curr_state][Symbol::EPSILON]; for (auto e_closure_state : e_closure){ if (!states_visited[e_closure_state]){ curr_state_set.insert(e_closure_state); states_visited[e_closure_state] = true; } } } // delete states_visited; } for (auto symbol : nfa_table.getAlphabet()){ if (symbol == Symbol::EPSILON) continue; int_set next_set; bool states_visited[nfa_state_count] = { 0 }; for (auto curr_state : curr_state_set){ if (!is_final_state && curr_state == nfa_table.getFinalState()) is_final_state = true; if (nfa_table_map[curr_state].find(symbol) != nfa_table_map[curr_state].end()){ int_set next_states = nfa_table_map[curr_state][symbol]; for (auto ns : next_states){ int_set e_closure = nfa_table_map[ns][Symbol::EPSILON]; for (auto e_closure_state : e_closure){ if (!states_visited[e_closure_state]){ next_set.insert(e_closure_state); states_visited[e_closure_state] = true; } } } } } string next_state_name = Utils::TO_STRING(next_set); if (next_set.size() > 0){ if (dfa_state_mapping.find(next_state_name) == dfa_state_mapping.end()){ // dfa.addState(next_state_name); dfa_state_count++; dfa.addState("q" + to_string(dfa_state_count)); dfa_state_mapping.insert({next_state_name, dfa_state_count}); } dfa.addTransition(dfa_state_mapping[state_name], dfa_state_mapping[next_state_name], symbol); } // else{ // dfa.addTransition(dfa_state_mapping[state_name], 0, symbol); // } if (is_final_state) dfa.addFinalState(dfa_state_mapping[state_name]); if (next_set.size() > 0 && visited.find(next_state_name) == visited.end()){ set_queue.push(next_set); } // delete states_visited; } } return dfa; }