void subsetconstruct(int (*dfatable)[128], struct set *acceptset) { struct dfa *dfa; struct set *next; int nextstate = 0; int c, state; struct accept *accept; while (dfa = next_dfa()) { for (c = 0; c < MAX_CHARS; c++) { /* compute next dfa, to which dfa move on c */ next = move(dfa->states, c); next = epsilon_closure(next, &accept, 0); /* no transition */ if (!next) state = F; /* transition from current to next */ else if ((state = in_dfa(next)) >= 0) freeset(next); /* next is alloced by move()*/ else state = add_dfa(next, accept); /* real assign the dfatable: [0->ndfas][0->MAX_CHARS] */ dfatable[state_dfa(dfa)][c] = state; /* NOTE: using state, not ndfas - 1 */ if (accept) addset(acceptset, state); } } }
static void init_dfas(struct nfa *sstate, struct set *accept) { struct accept *acp; struct set *first; int i; /* alloc dfa buffer */ dfastates = xmalloc(MAX_DFAS * sizeof(struct dfa)); /* init dfas */ for (i = 0; i < MAX_DFAS; i++) { dfastates[i].group = -1; dfastates[i].states = NULL; dfastates[i].accept = NULL; } /* init first dfa state */ first = newset(); addset(first, nfastate(sstate)); epsilon_closure(first, &acp, 0); /* NOTE: first dfa can be accepted, such as regexp: `.*` */ if (acp) { dfastates[ndfas].accept = getaccept(acp); addset(accept, 0); } dfastates[0].states = first; /* some internal parmaters */ ndfas = 1; currentdfa = 0; }
//struct setcomp { // bool operator()(const set<State *> &states1, const set<State *> &states2) { // if (states1.size() < states2.size()) { // return true; // } // if (states2.size() < states1.size()) { // return false; // } // for (auto it = states1.begin(); it != states1.end(); ++it) { // for (auto it2 = states2.begin(); it != states2.end(); ++it2) { // if (*it < *it2) { // return true; // } // } // } // return false; // } //}; DFA *buidDFA(const NFA *nfa) { set<State *> begin; epsilon_closure(nfa->start, begin); DFA *start = new DFA(begin); stack<DFA *> stack; stack.push(start); map<set<State *>, DFA *> map;//存放已经访问过的DFA,若访问过则直接从里面取DFA,若没有则插入 while (!stack.empty()) { DFA *dfa = stack.top(); stack.pop(); // auto p = pair<set<State *>, DFA *>(dfa->states, dfa); // map.insert(p); map[dfa->states] = dfa; //cout << "sb" << endl; for (int c = 0; c <= 255; c++) {//不能用char(-128-127)也不能用unsigned char(0-255),到255时+1就溢出,溢出条件为真无限循环 set<State *> next_vec = epsilon_bychar(dfa, c); if (map.find(next_vec) == map.end() && next_vec.size() != 0) {//若是新的DFA //cout << "x"<<c <<endl; DFA *next = new DFA(next_vec); stack.push(next); dfa->next[c] = next; } else if (next_vec.size() != 0){ // cout << c <<endl; dfa->next[c] = map[next_vec]; } } } return start; }
set<State *> epsilon_bychar(DFA *dfa, int c) {//epsilon(move(T,c))龙书第三章子集构造公式之一 set<State *> states; for (auto &sp : dfa->states) { for (auto &ep : sp->edgs) { if (ep && ep->ch == c) { epsilon_closure(ep->nextState, states);//参数必须时ep->nextState.若是sp则有错 } } } return states; }
/* Given a list of states, return the union of the epsilon closures for each * of its member states. */ static int set_epsilon_closure(PyObject *model, PyObject *state_set, PyObject *states) { PyObject *epsilon_set; int i; for (i = 0; i < PyList_GET_SIZE(states); i++) { epsilon_set = epsilon_closure(model, PyList_GET_ITEM(states, i)); if (epsilon_set == NULL) { return -1; } if (PyDict_Merge(state_set, epsilon_set, 1) < 0) { Py_DECREF(epsilon_set); return -1; } Py_DECREF(epsilon_set); } return 0; }
/* Given a non-deterministic machine, return a new equivalent machine * which is deterministic. */ static PyObject *compile_model(PyObject *model) { PyObject *dfa; PyObject *old_to_new_map; PyObject *new_to_old_map; PyObject *transitions; PyObject *initial_state; PyObject *state_set; PyObject *new_state; int final_state, dfa_state; final_state = ContentModel_NewState(model); if (final_state < 0) return NULL; if (ContentModel_AddTransition(model, final_event, 1, final_state) < 0) return NULL; dfa = DFA_New(); old_to_new_map = PyDict_New(); new_to_old_map = PyDict_New(); transitions = PyDict_New(); if (dfa == NULL || old_to_new_map == NULL || new_to_old_map == NULL || transitions == NULL) { Py_XDECREF(old_to_new_map); Py_XDECREF(new_to_old_map); Py_XDECREF(transitions); Py_XDECREF(dfa); return NULL; } /* Seed the process using the initial states of the old machines. */ initial_state = PyInt_FromLong(0L); if (initial_state == NULL) goto error; state_set = epsilon_closure(model, initial_state); Py_DECREF(initial_state); if (state_set == NULL) goto error; new_state = map_old_to_new(dfa, old_to_new_map, new_to_old_map, state_set); Py_DECREF(state_set); if (new_state == NULL) goto error; /* Tricky bit here; we add things to the end of this list while * iterating over it. The iteration stops when closure is achieved. */ for (dfa_state = 0; dfa_state < DFA_Size(dfa); dfa_state++) { PyObject *state_num; PyObject *event; PyObject *temp; Py_ssize_t i; new_state = PyInt_FromLong(dfa_state); if (new_state == NULL) goto error; state_set = PyDict_GetItem(new_to_old_map, new_state); Py_DECREF(new_state); if (state_set == NULL) { /* this should not happen, but just in case... */ PyErr_Format(PyExc_SystemError, "state %" PY_FORMAT_SIZE_T "d not mapped to old states", i); goto error; } PyDict_Clear(transitions); i = 0; while (PyDict_Next(state_set, &i, &state_num, &temp)) { PyObject *old_transitions, *target_states; int nfa_state; Py_ssize_t n; nfa_state = PyInt_AS_LONG(state_num); if (nfa_state >= ContentModel_Size(model)) { PyErr_Format(PyExc_SystemError, "state %d not a valid NFA state", nfa_state); goto error; } old_transitions = ContentModel_GetState(model, nfa_state); n = 0; while (PyDict_Next(old_transitions, &n, &event, &target_states)) { if (event != epsilon_event) { PyObject *old_state_set = PyDict_GetItem(transitions, event); if (old_state_set == NULL) { old_state_set = PyDict_New(); if (old_state_set == NULL) goto error; if (PyDict_SetItem(transitions, event, old_state_set) < 0) { Py_DECREF(old_state_set); goto error; } Py_DECREF(old_state_set); } if (set_epsilon_closure(model, old_state_set, target_states) < 0) { goto error; } } } } i = 0; while (PyDict_Next(transitions, &i, &event, &state_set)) { PyObject *new_transitions; new_state = map_old_to_new(dfa, old_to_new_map, new_to_old_map, state_set); if (new_state == NULL) goto error; new_transitions = DFA_GetState(dfa, dfa_state); if (PyDict_SetItem(new_transitions, event, new_state) < 0) goto error; } } Py_DECREF(old_to_new_map); Py_DECREF(new_to_old_map); Py_DECREF(transitions); return dfa; error: Py_DECREF(old_to_new_map); Py_DECREF(new_to_old_map); Py_DECREF(transitions); Py_DECREF(dfa); return NULL; }