Ejemplo n.º 1
0
//==========================================================================================================
//==========================================================================================================
NFA::NFA(Grammar& grammar) {
    // Hold the initials states created from productions for a nonterminal. This is needed for adding the
    // epsilon transitons
    map<Symbol, vector<int>> nonterminals_initial_states;
    int state = 0;

    //------------------------------------------------------------------------------------------------------
    // Add transitions for every production
    //------------------------------------------------------------------------------------------------------
    for(int i = 0; i < grammar.productions.size(); ++i, ++state) {
        if(i > 0) // The start symbol will not have any transitions defined for it
            nonterminals_initial_states[grammar.productions[i][0]].push_back(state);

        for(int j = 1; j < grammar.productions[i].size(); ++j, ++state) {
            // When adding a transition on the last symbol of the production, the state is accepting
            int accepting_val = ((j == grammar.productions[i].size() - 1) ? i : -1);
            add_transition(state, grammar.productions[i][j], state + 1, accepting_val);
        }
    }

    //------------------------------------------------------------------------------------------------------
    // Add the epsilon transitions: whenever there is a transition from state s to state t on a nonterminal
    // N, we add epsilon transitions from s to the initial states of all the NFAs for productions with N on
    // the left-hand side
    //------------------------------------------------------------------------------------------------------
    for(int s = 0; s < table.size(); ++s) {
        for(int sym = 0; sym < NUM_NON_TERMINALS; ++sym) {
            if(not table[s][sym].empty()) { // Transition is defined
                for(auto i: nonterminals_initial_states[Symbol(sym)]) {
                    add_epsilon_transition(s, i);
                }
            }
        }
    }
}
Ejemplo n.º 2
0
error_t compile_regexp_thompson(thompson_nfa_description_t* nfa, struct ast_node* ast)
{
  error_t err;
  thompson_nfa_description_t* machs = NULL;
  int nmachs = 0;
  int sub_nodes, first, last, initial, final;

  // Does Thompson's construction.
  memset(nfa, 0, sizeof(thompson_nfa_description_t));

  switch (ast->type) {
    case AST_NODE_REGEXP:
      {
        struct regexp_node* n = (struct regexp_node*) ast;

        nmachs = n->choices.num;
        // we'll store the machines we're putting together in machs.
        machs = malloc(sizeof(thompson_nfa_description_t)*nmachs);
        if( ! machs ) { err = ERR_MEM; goto error; }

        // make the recursive call to create the machines
        sub_nodes = 0;
        for( int i = 0; i < nmachs; i++ ) {
          err = compile_regexp_thompson(&machs[i], n->choices.list[i]);
          if( err ) goto error;
          sub_nodes += machs[i].num_nodes;
        }

        if( nmachs == 1 ) {
          // only one machine - just copy that machine!
          err = append_nfa(nfa, &machs[0], 0);
          if( err ) goto error;
        } else {
          // add a start node.
          err = add_epsilon_node(nfa);
          if( err ) goto error;

          initial = 0;
          final = sub_nodes + 1; // 1 for initial state.

          // add the sub-machines.
          for( int i = 0; i < nmachs; i++ ) {
            first = nfa->num_nodes;
            err = append_nfa(nfa, &machs[i], 0);
            if( err ) goto error;
            last = nfa->num_nodes - 1;
            // add the epsilon transition from the initial to first
            err = add_epsilon_transition(nfa, 0, first);
            if( err ) goto error;
            // add the epsilon transition from last to final
            err = add_epsilon_transition(nfa, last, final);
            if( err ) goto error;
          }
          // add the final node.
          err = add_epsilon_node(nfa);
          if( err ) goto error;
          assert(nfa->num_nodes - 1 == final);
        }

      }
      break;
    case AST_NODE_SEQUENCE:
      {
        struct sequence_node* n = (struct sequence_node*) ast;

        // we'll store the machines we're putting together in machs.
        nmachs = n->atoms.num;
        machs = malloc(sizeof(thompson_nfa_description_t)*nmachs);
        if( ! machs ) { err = ERR_MEM; goto error; }

        // make the recursive call to create the machines
        sub_nodes = 0;
        for( int i = 0; i < nmachs; i++ ) {
          err = compile_regexp_thompson(&machs[i], n->atoms.list[i]);
          if( err ) return err;
          sub_nodes += machs[i].num_nodes;
        }

        // add the machines
        for( int i = 0; i < nmachs; i++ ) {
          err = append_nfa(nfa, &machs[i], 1);
          if( err ) goto error;
        }

      }
      break;
    case AST_NODE_ATOM:
      {
        struct atom_node* n = (struct atom_node*) ast;
        // an atom has just some number of repeats...
        nmachs = 1;
        machs = malloc(sizeof(thompson_nfa_description_t)*nmachs);
        if( ! machs ) { err = ERR_MEM; goto error; }

        err = compile_regexp_thompson(&machs[0], n->child);
        if( err ) goto error;
        sub_nodes = machs[0].num_nodes;

        // now add the required number.
        for( int i = 0; i < n->repeat.min; i++ ) {
          // with overlap..
          err = append_nfa(nfa, &machs[0], 1);
          if( err ) goto error;
        }
        if( n->repeat.min == 0 ) {
          err = add_epsilon_node(nfa);
          if( err ) goto error;
        }

        // now add the optional number, or the optional unbounded case.
        if( n->repeat.max == UNBOUNDED_REPEATS ) {
          // add the optional unbounded case - *
          initial = nfa->num_nodes - 1;
          final = initial + 1 + sub_nodes;

          first = nfa->num_nodes;
          err = append_nfa(nfa, &machs[0], 0);
          if( err ) goto error;
          last = nfa->num_nodes - 1;

          // add the final node
          err = add_epsilon_node(nfa);
          if( err ) goto error;

          // add the transitions:
          // initial->first
          // last->first
          // last->final
          // initial->final
          err = add_epsilon_transition(nfa, initial, first);
          if( err ) goto error;
          err = add_epsilon_transition(nfa, last, first);
          if( err ) goto error;
          err = add_epsilon_transition(nfa, last, final);
          if( err ) goto error;
          err = add_epsilon_transition(nfa, initial, final);
          if( err ) goto error;

        } else {