node *parse (const rules_char_type *start_, const rules_char_type * const end_, const id_type id_, const id_type user_id_, const id_type next_dfa_, const id_type push_dfa_, const bool pop_dfa_, const std::size_t flags_, id_type &nl_id_, const bool seen_bol_, const bool macro_) { node *root_ = 0; state state_ (start_, end_, id_, flags_, _locale, macro_); token *lhs_token_ = 0; std::auto_ptr<token> rhs_token_ (new token); char action_ = 0; _token_stack->push (static_cast<token *>(0)); _token_stack->top () = rhs_token_.release (); rhs_token_.reset (new token); tokeniser::next (_token_stack->top (), state_, rhs_token_.get ()); do { lhs_token_ = _token_stack->top (); action_ = lhs_token_->precedence (rhs_token_->_type); switch (action_) { case '<': case '=': _token_stack->push (static_cast<token *>(0)); _token_stack->top () = rhs_token_.release (); rhs_token_.reset (new token); tokeniser::next (_token_stack->top (), state_, rhs_token_.get ()); break; case '>': reduce (state_); break; default: { std::ostringstream ss_; ss_ << "A syntax error occurred: '" << lhs_token_->precedence_string () << "' against '" << rhs_token_->precedence_string () << "' preceding index " << state_.index () << " in rule id " << state_._id << '.'; throw runtime_error (ss_.str ()); break; } } } while (!_token_stack->empty ()); if (_tree_node_stack.empty ()) { std::ostringstream ss_; ss_ << "Empty rules are not allowed in rule id " << state_._id << '.'; throw runtime_error (ss_.str ()); } assert (_tree_node_stack.size () == 1); node *lhs_node_ = _tree_node_stack.top (); _tree_node_stack.pop (); if (macro_) { // Macros have no end state... root_ = lhs_node_; } else { _node_ptr_vector->push_back (static_cast<end_node *>(0)); node *rhs_node_ = new end_node (id_, user_id_, next_dfa_, push_dfa_, pop_dfa_); _node_ptr_vector->back () = rhs_node_; _node_ptr_vector->push_back (static_cast<sequence_node *>(0)); _node_ptr_vector->back () = new sequence_node (lhs_node_, rhs_node_); root_ = _node_ptr_vector->back (); } if (seen_bol_) { fixup_bol (root_); } if (state_._nl_id != static_cast<id_type>(~0)) { nl_id_ = state_._nl_id; } if ((flags_ & match_zero_len) == 0) { const typename node::node_vector &firstpos_ = root_->firstpos(); typename node::node_vector::const_iterator iter_ = firstpos_.begin (); typename node::node_vector::const_iterator end_ = firstpos_.end (); for (; iter_ != end_; ++iter_) { const node *node_ = *iter_; if (node_->end_state ()) { std::ostringstream ss_; ss_ << "Rules that match zero characters are not allowed " "as this can cause an infinite loop in user code. The " "match_zero_len flag overrides this check. Rule id " << state_._id << '.'; throw runtime_error (ss_.str ()); } } } return root_; }
node *parse(const token_deque ®ex_, const id_type id_, const id_type user_id_, const id_type next_dfa_, const id_type push_dfa_, const bool pop_dfa_, const std::size_t flags_, id_type &nl_id_, const bool seen_bol_) { typename token_deque::const_iterator iter_ = regex_.begin(); typename token_deque::const_iterator end_ = regex_.end(); node *root_ = 0; token *lhs_token_ = 0; // There cannot be less than 2 tokens std::auto_ptr<token> rhs_token_(new token(*iter_++)); char action_ = 0; _token_stack->push(static_cast<token *>(0)); _token_stack->top() = rhs_token_.release(); rhs_token_.reset(new token(*iter_)); if (iter_ + 1 != end_) ++iter_; do { lhs_token_ = _token_stack->top(); action_ = lhs_token_->precedence(rhs_token_->_type); switch (action_) { case '<': case '=': _token_stack->push(static_cast<token *>(0)); _token_stack->top() = rhs_token_.release(); rhs_token_.reset(new token(*iter_)); if (iter_ + 1 != end_) ++iter_; break; case '>': reduce(nl_id_); break; default: { std::ostringstream ss_; ss_ << "A syntax error occurred: '" << lhs_token_->precedence_string() << "' against '" << rhs_token_->precedence_string() << " in rule id " << id_ << '.'; throw runtime_error(ss_.str()); break; } } } while (!_token_stack->empty()); if (_tree_node_stack.empty()) { std::ostringstream ss_; ss_ << "Empty rules are not allowed in rule id " << id_ << '.'; throw runtime_error(ss_.str()); } assert(_tree_node_stack.size() == 1); node *lhs_node_ = _tree_node_stack.top(); _tree_node_stack.pop(); _node_ptr_vector->push_back(static_cast<end_node *>(0)); node *rhs_node_ = new end_node(id_, user_id_, next_dfa_, push_dfa_, pop_dfa_); _node_ptr_vector->back() = rhs_node_; _node_ptr_vector->push_back(static_cast<sequence_node *>(0)); _node_ptr_vector->back() = new sequence_node (lhs_node_, rhs_node_); root_ = _node_ptr_vector->back(); if (seen_bol_) { fixup_bol(root_); } if ((flags_ & match_zero_len) == 0) { const typename node::node_vector &firstpos_ = root_->firstpos(); typename node::node_vector::const_iterator iter_ = firstpos_.begin(); typename node::node_vector::const_iterator end_ = firstpos_.end(); for (; iter_ != end_; ++iter_) { const node *node_ = *iter_; if (node_->end_state()) { std::ostringstream ss_; ss_ << "Rules that match zero characters are not allowed " "as this can cause an infinite loop in user code. The " "match_zero_len flag overrides this check. Rule id " << id_ << '.'; throw runtime_error(ss_.str()); } } } return root_; }