void
analyser_environment_t::add_a_terminal_to_lookahead_set(
  node_t * const node,
  lookahead_set_t * const lookahead_set,
  unsigned int * const prev_level_walk_count,
  size_t const cur_level,
  size_t const max_level,
  std::list<recur_lookahead_t> &recur_parent_stack,
  std::list<last_symbol_t> &last_symbol_stack,
  unsigned int const curr_last_symbol_level) const
{  
  lookahead_set_t * const new_lookahead_set = insert_lookahead(lookahead_set, node);
  assert(new_lookahead_set != 0);
  
#if defined(TRACING_LOOKAHEAD)
  log(L"%c\n", L'*');
#endif
  
  assert(1 == node->next_nodes().size());
  if (cur_level < (max_level - 1))
  {
    node_t * const next_node = node->next_nodes().front();
    
#if 0
    wchar_t *str;
    if (0 == next_node->name().size())
    {
      str = form_rule_end_node_name(next_node, false);
    }
    else
    {
      str = const_cast<wchar_t *>(next_node->name().c_str());
    }
    log(L"<INFO>: compute lookahead %d from node %s[%d]\n",
        cur_level + 1,
        str,
        next_node->overall_idx());
    if (0 == next_node->name().size())
    {
      fmtstr_delete(str);
    }
#endif
    
    // search more lookahead from here.
    compute_lookahead_set(next_node,
                          new_lookahead_set,
                          prev_level_walk_count,
                          cur_level + 1,
                          max_level,
                          recur_parent_stack,
                          last_symbol_stack,
                          curr_last_symbol_level + 1);
  }
}
Exemple #2
0
void *
library_load(
    char const * const filename)
{
#if LINUX
    return ::dlopen(filename, RTLD_LAZY);
#elif WIN32
    wchar_t * const filename_w = fmtstr_mbstowcs(filename, NULL);

    HMODULE module = LoadLibrary(filename_w);
    assert(module != NULL);

    fmtstr_delete(static_cast<void *>(filename_w));

    return module;
#else
#error "Platform doesn't support dlopen and we have no implementation."
#endif
}
Exemple #3
0
int
mkdir(
    char const *pathname,
    mode_t mode)
{
    assert(pathname != 0);

    wchar_t * const pathname_w = fmtstr_mbstowcs(pathname, NULL);
    assert(pathname_w != NULL);

    int const result = CreateDirectory(pathname_w, NULL);

    fmtstr_delete(static_cast<void *>(pathname_w));

    /* mkdir returns 0 for success, opposite of
     * CreateDirectory().
     */
    return ((result != 0) ? 0 : -1);
}
void
analyser_environment_t::compute_lookahead_terminal_for_node(
  node_t * const node,
  size_t const needed_depth) const
{
  std::list<recur_lookahead_t> recur_parent_stack;
  std::list<last_symbol_t> last_symbol_stack;
  
  wchar_t *str;
  
  if (0 == node->name().size())
  {
    str = form_rule_end_node_name(node, false);
  }
  else
  {
    str = const_cast<wchar_t *>(node->name().c_str());
  }
  log(L"<INFO>: compute lookahead %d from node %s[%d]\n",
      needed_depth,
      str,
      node->overall_idx());
  if (0 == node->name().size())
  {
    fmtstr_delete(str);
  }
  
  compute_lookahead_set(node,
                        &(node->lookahead_set()),
                        0,
                        0,
                        needed_depth,
                        recur_parent_stack,
                        last_symbol_stack,
                        0);
}
Exemple #5
0
bool
analyser_environment_t::parse_command_line(int argc, char **argv)
{
    int i;
    bool correct = true;

    for (i = 1; i < argc; ++i)
    {
        wchar_t * const tmp = fmtstr_mbstowcs(argv[i], 0);
        assert(tmp != 0);
        boost::shared_ptr<wchar_t> parm_ptr(tmp, fmtstr_delete);

        if ((0 == wcscmp(L"-h", parm_ptr.get())) ||
                (0 == wcscmp(L"--help", parm_ptr.get())))
        {
            fwprintf(stderr, L"%s\n", USAGE_MESSAGE);
        }
#if defined(_DEBUG)
        else if (0 == wcscmp(L"-cmp_ans", parm_ptr.get()))
        {
            m_cmp_ans = true;
        }
#endif
        else if (0 == wcscmp(L"-o", parm_ptr.get()))
        {
            wchar_t * const tmp = fmtstr_mbstowcs(argv[++i], 0);
            assert(tmp != 0);
            parm_ptr.reset(tmp, fmtstr_delete);

            wchar_t * const filename = fmtstr_new(L"%s%d",
                                                  parm_ptr.get(),
                                                  m_curr_output_filename_idx);
            mp_output_file = new std::wfstream(filename,
                                               std::ios_base::out |
                                               std::ios_base::binary);
            fmtstr_delete(filename);
            if (false == mp_output_file->is_open())
            {
                fwprintf(stderr, L"Can not open output file: %s\n", argv[i]);
                return false;
            }
            m_output_filename = parm_ptr.get();
        }
        else
        {
            if (L'-' == *parm_ptr)
            {
                fprintf(stderr, "unknown option: %s\n", parm_ptr.get());
                correct = false;
            }
            else
            {
                // The only possible now is source file,
                // then try to find one.
                m_grammar_file_name = parm_ptr.get();
                m_grammar_file.open(parm_ptr.get(), std::ios_base::in | std::ios_base::binary);

                if (false == m_grammar_file.is_open())
                {
                    fwprintf(stderr, L"Can not open grammar file: %s\n", argv[i]);
                    return false;
                }
            }
        }
    }

    if (false == correct)
    {
        return false;
    }
    if (false == m_grammar_file.is_open())
    {
        fprintf(stderr, "You must specify a grammar file.\n");
        return false;
    }

    return true;
}
/// @brief Compute lookahead terminals.
///
/// Compute lookahead terminals from the @p node , and put
/// them into the @p lookahead_set .
///
/// This function utilize a cache facility. In the following
/// situation:
///
/// A
///   B c
///   B
///
/// If I need to collect @e x (or any number) lookahead
/// terminal from the node A, and after tracing node B in
/// the first alternative, I have collected @e x (or any
/// number) lookahead terminal, then I don't need to trace
/// node B in the second alternative.
///
/// The way I achieve this facility is to pass a local
/// variable (initial to 0) of the function
/// compute_lookahead_set() whose first argument @p node is
/// the node B in the first alternative above to the called
/// compute_lookahead_set() for the nonterminal rule node of
/// the mentioned node B. If the compute_lookahead_set()
/// function calls chain tracing after the node B in the
/// first alternative (i.e. to the node c above), then I
/// will set the local variable to 1. Before to trace into
/// the node B in the second alternative, I will check the
/// value of the local variable specified above, if its
/// value is 1, then I still need to trace into it, besides,
/// I will just return.
///
/// @param node 
/// @param lookahead_set 
/// @param cur_level 
/// @param max_level 
/// @param recur_parent_stack 
/// @param last_symbol_stack 
/// @param curr_last_symbol_level 
///
void
analyser_environment_t::compute_lookahead_set(
  node_t * const node,
  lookahead_set_t * const lookahead_set,
  unsigned int * const prev_level_walk_count,
  size_t const cur_level,
  size_t const max_level,
  std::list<recur_lookahead_t> &recur_parent_stack,
  std::list<last_symbol_t> &last_symbol_stack,
  unsigned int const curr_last_symbol_level) const
{
  assert(max_level >= 1);
  assert(cur_level < max_level);
  
#if defined(TRACING_LOOKAHEAD)
  wchar_t *str;
  
  if (0 == node->name().size())
  {
    str = form_rule_end_node_name(node, false);
  }
  else
  {
    str = const_cast<wchar_t *>(node->name().c_str());
  }
  log(L"<INFO>: trace to %s[%d]\n", str, node->overall_idx());
  if (0 == node->name().size())
  {
    fmtstr_delete(str);
  }
#endif
  
  if (true == node->name().empty())
  {
    // =======================================
    //        This is a rule end node 
    // =======================================
    assert(0 == node->next_nodes().size());
    
    if (recur_parent_stack.size() != 0)
    {
      // =================================================
      //  A rule end node which is come from another rule
      // =================================================
      
      // I come from this rule from another rule,
      // thus I can go back to the original node to trace more
      // lookahead terminals.
      //
      node_t * const return_node = recur_parent_stack.back().node();
      
      unsigned int * const return_node_prev_level_walk_count =
        recur_parent_stack.back().prev_level_walk_count();
      
      recur_parent_stack.pop_back();
      
      // If I trace back to the upper layer, I have to set
      // the walk_count of this layer to 0.
      //
      // Ex:
      //
      //   A->B->C
      //     / ^
      //    /   \_____
      //   /          \
      //  B ->D->F---->o
      //   \          /
      //    ->D->F->G/
      // 
      // After tracing the first alternative of rule B
      // (i.e. B->D->F->o), I go back to node B in
      // A->B->C. The walk count of B->D->F->o is 2. If I
      // don't set the walk count of it from 2 to 0, then if
      // I go back to rule B and want to trace the second
      // alternative (i.e. B->D->F->G->o), I will find that
      // I can skip to trace it (but this is wrong, I have
      // to trace it) because of the first 2 symbols are
      // equal in the first and second alternatives and the
      // walk count is 2.
      //
      if (prev_level_walk_count != 0)
      {
        (*prev_level_walk_count) = 0;
      }
      
      assert(return_node->name().size() != 0);
      assert(false == return_node->is_terminal());
      assert(false == return_node->is_rule_head());
      assert(1 == return_node->next_nodes().size());
      
#if defined(TRACING_LOOKAHEAD)
      log(L"<INFO>: trace back to %s[%d]\n",
          return_node->name().c_str(),
          return_node->overall_idx());
#endif
      
      compute_lookahead_set(return_node->next_nodes().front(),
                            lookahead_set,
                            return_node_prev_level_walk_count,
                            cur_level,
                            max_level,
                            recur_parent_stack,
                            last_symbol_stack,
                            curr_last_symbol_level);
      
      // return from 'compute_lookahead_set'
      //
      // Ex:
      //
      //   A->B->C
      //     / ^
      //    /   \__
      //   /       \
      //  B ->D->F->o
      //   \       /
      //    ->E->G/
      //
      // I am at 'o', just return from node 'B'.
      // thus I should add node 'B' to the back of the
      // 'recur_parent_stack' to indicate that I can go to
      // node 'B' when I tracing other paths of rule 'B'.
      //
      recur_parent_stack.push_back(
        recur_lookahead_t(return_node,
                          return_node_prev_level_walk_count));
    }
    else
    {
      // =======================================
      //   rule end node for the initial rule
      // =======================================
      
      // I come to here without going through any previous rule,
      // so that I have to continue tracing along with
      // 'refer_to_me_nodes()'. 
      //
      if (0 == node->rule_node()->refer_to_me_nodes().size())
      {
        // =======================================
        //            EOF rule end node
        // =======================================
        
        // This is a rule end node, and I can not find any
        // other node refer to this rule node, thus, I
        // should see an EOF here. 
        //
        // In lookahead symbols, I use 'rule end nodes' as a
        // sign which represents EOF symbol.
        //
        assert(true == node->is_eof());
        
        insert_lookahead(lookahead_set, node);
        
#if defined(TRACING_LOOKAHEAD)
        log(L"%c\n", L'*');
#endif
        
        // Because I should see EOF here, thus I should not
        // find more lookahead terminals from here.
        //
      }
      else
      {
        // =======================================
        //     rule end node has 'refer_to_me'
        // =======================================
        
        // Indicate that I am trying to find what terminals
        // can follow 'node->rule_node()'
        //
        last_symbol_stack.push_back(last_symbol_t(node->rule_node(),
                                                  curr_last_symbol_level));
        
        for (std::list<node_t *>::const_iterator iter =
               node->rule_node()->refer_to_me_nodes().begin();
             iter != node->rule_node()->refer_to_me_nodes().end();
             ++iter)
        {
          assert((*iter)->name().size() != 0);
          assert(false == (*iter)->is_terminal());
          assert(false == (*iter)->is_rule_head());
          assert(1 == (*iter)->next_nodes().size());
          
          if (((*iter)->rule_node() ==
               node->rule_node()) &&
              ((*iter)->next_nodes().front() ==
               node->rule_node()->rule_end_node()))
          {
            // If I encounter a node such that it is the
            // last node of the form: 
            //
            // A -> a A
            //
            // then I will skip tracing that node to look
            // for more lookahead terminals.
            //
            // Because it is a right recursion, I can not
            // find any more lookahead terminals from the
            // right recursion. 
            //
            // Thus, in this case, I don't need to trace
            // further and the only possible token I shall
            // see here is an EOF symbol.
            //
            // In lookahead symbols, I use 'rule end nodes'
            // as a sign which represents EOF symbol.
            //
            if (true == node->is_eof())
            {
              insert_lookahead(lookahead_set,
                               node->rule_node()->rule_end_node());
            }
            continue;
          }
          
          if (((*iter)->next_nodes().front() ==
               (*iter)->rule_node()->rule_end_node()) &&
              (true == find_last_symbol_stack(last_symbol_stack,
                                              (*iter)->rule_node(),
                                              curr_last_symbol_level)))
          {
            //===========================================
            //   why I need last_symbol_stack here? 
            //===========================================
            // Consider the following grammar:
            // ---
            // S
            //   a A
            // 
            // A
            //   c
            //   f
            // 
            // E
            //   C
            //   a F
            // 
            // C
            //   a S
            // 
            // F
            //   E
            //   V
            //
            // V
            //   e
            // ---
            // If we want to find 2-lookahead symbols for
            //   the 'a' alternative of the S rule, it will
            //   be {a,c; a,f}. 
            //
            // However, if we want to find 3-lookahead for
            // this 'a', the searching sequence will be:
            //
            // S -> a -> A -> c -> C -> E -> F -> E -> F ->
            // E -> F -> ...
            //
            // This is an infinite loop !
            //
            // Indeed, if we start from 'C':
            //
            // C -> a -> S -> a -> A -> {c,f}
            //
            // The terminals which we can trace from 'S' are
            // just {a,c; a,f}. There are no the third
            // terminal we can trace from 'S'. 
            //
            // Thus, we have to remember every nonterminals
            // when using the "refer_to_me"
            // relationships. If we find that the oncoming
            // nonterminal has already been traced, then we
            // will stop the recursive searching and just
            // return. 
            //
            
            // This is a 'X_end' node, and if I need to find
            // more lookahead terminals from the 'X_end',
            // this means that I must find what terminals
            // can follow X. 
            //
            // If 'last_symbol_stack' has 'X', this means I
            // have already try to find  what terminals can
            // follow 'X'.
            //
            // Thus, in this case, I don't need to trace
            // further and the only possible token I shall
            // see here is an EOF symbol.
            //
            // In lookahead symbols, I use 'rule end nodes'
            // as a sign which represents EOF symbol.
            //
            if (true == node->is_eof())
            {
              insert_lookahead(lookahead_set,
                               node->rule_node()->rule_end_node());
            }
            continue;
          }
          else
          {
#if defined(TRACING_LOOKAHEAD)
            log(L"<INFO>: [N]trace further to %s[%d]\n",
                (*iter)->name().c_str(),
                (*iter)->overall_idx());
#endif
            
            compute_lookahead_set((*iter)->next_nodes().front(),
                                  lookahead_set,
                                  0,
                                  cur_level,
                                  max_level,
                                  recur_parent_stack,
                                  last_symbol_stack,
                                  curr_last_symbol_level);
          }
        }
        
        assert(last_symbol_stack.size() >= 1);
        assert(last_symbol_stack.back().mp_node == node->rule_node());
        
        last_symbol_stack.pop_back();
      }
    }
  }
  else if (true == node->is_terminal())
  {
    // =======================================
    //        This is a terminal node 
    // =======================================
    if (prev_level_walk_count != 0)
    {
      if (node->distance_from_rule_head() > (*prev_level_walk_count))
      {
        (*prev_level_walk_count) = node->distance_from_rule_head();
      }
    }
    
    add_a_terminal_to_lookahead_set(
      node,
      lookahead_set,
      prev_level_walk_count,
      cur_level,
      max_level,
      recur_parent_stack,
      last_symbol_stack,
      curr_last_symbol_level);
  }
  else
  {
    // ======================================
    //           nonterminal node
    // ======================================
    
    assert(false == node->is_terminal());
    
    if (prev_level_walk_count != 0)
    {
      if (node->distance_from_rule_head() > (*prev_level_walk_count))
      {
        (*prev_level_walk_count) = node->distance_from_rule_head();
      }
    }
    
    if (false == node->is_rule_head())
    {
      // ================================================
      //    Normal non-terminal node, NOT a rule node
      // ================================================
      assert(node->nonterminal_rule_node() != 0);
      
      // I design a 'treat_as_terminal' feature to avoid
      // to trace lookahead symbols depthly. However, I
      // found that this feature is not good enough to
      // solve all such problems and has significant
      // restriction.
      //
      // Ex:
      //
      //   "S" [as_terminal:A]
      //   : "A" "a" ---------(1)
      //   | "B" "b" ---------(2)
      //   ;
      //   
      //   "A"
      //   : "c" "S" "e" "f"
      //   ;
      //   
      //   "B"
      //   : "A"
      //   ;
      //
      // The fact is I can not distinguish the rule (1) &
      // rule (2) of nonterminal S, because the lookahead
      // symbols for this 2 rules are equal - ("c", "c",
      // "c", ...). However, if I use 'as_terminal:A', then
      // this algorithm will distinguish these 2 rules, and
      // this is wrong.
      //
      // So that users must be careful to use this
      // 'as_terminal' feature.
      bool treat_as_terminal = false;
      
      // To see whether I want to treat this nonterminal
      // node as a terminal node.
      for (std::list<std::wstring>::const_iterator iter =
             lookahead_set->mp_orig_node->rule_node()->
             token_name_as_terminal_during_lookahead().begin();
           iter != lookahead_set->mp_orig_node->rule_node()->
             token_name_as_terminal_during_lookahead().end();
           ++iter)
      {
        if (0 == (*iter).compare(node->name()))
        {
          // Want to treat this nonterminal node as a
          // terminal.
          treat_as_terminal = true;
          break;
        }
      }
      
      if (true == treat_as_terminal)
      {
        add_a_terminal_to_lookahead_set(
          node,
          lookahead_set,
          prev_level_walk_count,
          cur_level,
          max_level,
          recur_parent_stack,
          last_symbol_stack,
          curr_last_symbol_level);
      }
      else
      {
#if defined(TRACING_LOOKAHEAD)
        log(L"<INFO>: trace to rule - %s[%d]\n",
            node->nonterminal_rule_node()->name().c_str(),
            node->nonterminal_rule_node()->overall_idx());
#endif
        
        // indicate that I can go back to here.
        recur_parent_stack.push_back(
          recur_lookahead_t(node,
                            prev_level_walk_count));
        
        std::list<unsigned int> walk_counts(
          node->nonterminal_rule_node()->next_nodes().size(),
          0);
        
        std::list<unsigned int>::iterator iter_count =
          walk_counts.begin();
        
        for (std::list<node_t *>::const_iterator iter =
               node->nonterminal_rule_node()->next_nodes().begin();
             iter != node->nonterminal_rule_node()->next_nodes().end();
             ++iter,
             ++iter_count)
        {
          if (true == is_duplicated_alternatives(
                node->nonterminal_rule_node()->next_nodes().begin(),
                iter,
                walk_counts))
          {
          }
          else
          {
            compute_lookahead_set(*iter,
                                  lookahead_set,
                                  &(*iter_count),
                                  cur_level,
                                  max_level,
                                  recur_parent_stack,
                                  last_symbol_stack,
                                  curr_last_symbol_level);
            
            assert((*iter_count) <= (*iter)->distance_to_rule_end_node());
          }
        }
        
        assert(recur_parent_stack.size() > 0);
        assert(recur_parent_stack.back().node() == node);
        
        recur_parent_stack.pop_back();
      }
    }
    else
    {
      // ======================================
      //        Rule non-terminal node
      // ======================================
      
      std::list<unsigned int> walk_counts(
        node->next_nodes().size(),
        0);
      
      std::list<unsigned int>::iterator iter_count =
        walk_counts.begin();
      
      for (std::list<node_t *>::const_iterator iter =
             node->next_nodes().begin();
           iter != node->next_nodes().end();
           ++iter,
             ++iter_count)
      {
        // handle duplicated alternatives
        if (true == is_duplicated_alternatives(node->next_nodes().begin(),
                                               iter,
                                               walk_counts))
        {
        }
        else
        {
          compute_lookahead_set(*iter,
                                lookahead_set,
                                &(*iter_count),
                                cur_level,
                                max_level,
                                recur_parent_stack,
                                last_symbol_stack,
                                curr_last_symbol_level);
          
          assert((*iter_count) <= (*iter)->distance_to_rule_end_node());
        }
      }
    }
  }
}