inline sequence<BidiIter> make_charset_xpression ( compound_charset<Traits> &chset , Traits const &tr , regex_constants::syntax_option_type flags ) { typedef typename Traits::char_type char_type; bool const icase = (0 != (regex_constants::icase_ & flags)); bool const optimize = is_narrow_char<char_type>::value && 0 != (regex_constants::optimize & flags); // don't care about compile speed -- fold eveything into a bitset<256> if(optimize) { typedef basic_chset<char_type> charset_type; charset_type charset(chset.base()); if(icase) { charset_matcher<Traits, mpl::true_, charset_type> matcher(charset); merge_charset(matcher.charset_, chset, tr); return make_dynamic<BidiIter>(matcher); } else { charset_matcher<Traits, mpl::false_, charset_type> matcher(charset); merge_charset(matcher.charset_, chset, tr); return make_dynamic<BidiIter>(matcher); } } // special case to make [[:digit:]] fast else if(chset.base().empty() && chset.posix_no().empty()) { BOOST_ASSERT(0 != chset.posix_yes()); posix_charset_matcher<Traits> matcher(chset.posix_yes(), chset.is_inverted()); return make_dynamic<BidiIter>(matcher); } // default, slow else { if(icase) { charset_matcher<Traits, mpl::true_> matcher(chset); return make_dynamic<BidiIter>(matcher); } else { charset_matcher<Traits, mpl::false_> matcher(chset); return make_dynamic<BidiIter>(matcher); } } }
inline void merge_charset ( basic_chset<Char> &basic , compound_charset<Traits> const &compound , Traits const &tr ) { detail::ignore_unused(tr); if(0 != compound.posix_yes()) { typename Traits::char_class_type mask = compound.posix_yes(); for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i) { if(tr.isctype((Char)i, mask)) { basic.set((Char)i); } } } if(!compound.posix_no().empty()) { for(std::size_t j = 0; j < compound.posix_no().size(); ++j) { typename Traits::char_class_type mask = compound.posix_no()[j]; for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i) { if(!tr.isctype((Char)i, mask)) { basic.set((Char)i); } } } } if(compound.is_inverted()) { basic.inverse(); } }
inline void parse_charset ( FwdIter &begin , FwdIter end , compound_charset<RegexTraits> &chset , CompilerTraits &traits ) { using namespace regex_constants; typedef typename RegexTraits::char_type char_type; typedef typename RegexTraits::char_class_type char_class_type; BOOST_ASSERT(begin != end); RegexTraits const &rxtraits = traits.traits(); bool const icase = (0 != (regex_constants::icase_ & traits.flags())); FwdIter iprev = FwdIter(); escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char}; bool invert = false; // check to see if we have an inverse charset if(begin != end && token_charset_invert == traits.get_charset_token(iprev = begin, end)) { begin = iprev; invert = true; } // skip the end token if-and-only-if it is the first token in the charset if(begin != end && token_charset_end == traits.get_charset_token(iprev = begin, end)) { for(; begin != iprev; ++begin) { chset.set_char(*begin, rxtraits, icase); } } compiler_token_type tok; char_type ch_prev = char_type(), ch_next = char_type(); bool have_prev = false; ensure(begin != end, error_brack, "unexpected end of pattern found"); // remember the current position and grab the next token iprev = begin; tok = traits.get_charset_token(begin, end); do { ensure(begin != end, error_brack, "unexpected end of pattern found"); if(token_charset_hyphen == tok && have_prev) { // remember the current position FwdIter iprev2 = begin; have_prev = false; // ch_prev is lower bound of a range switch(traits.get_charset_token(begin, end)) { case token_charset_hyphen: case token_charset_invert: begin = iprev2; // un-get these tokens and fall through case token_literal: ch_next = *begin++; detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range"); chset.set_range(ch_prev, ch_next, rxtraits, icase); continue; case token_charset_backspace: ch_next = char_type(8); // backspace detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range"); chset.set_range(ch_prev, ch_next, rxtraits, icase); continue; case token_escape: esc = parse_escape(begin, end, traits); if(escape_char == esc.type_) { detail::ensure(ch_prev <= esc.ch_, error_range, "invalid charset range"); chset.set_range(ch_prev, esc.ch_, rxtraits, icase); continue; } case token_charset_end: // fall through default: // not a range. begin = iprev; // backup to hyphen token chset.set_char(ch_prev, rxtraits, icase); chset.set_char(*begin++, rxtraits, icase); continue; } } if(have_prev) { chset.set_char(ch_prev, rxtraits, icase); have_prev = false; } switch(tok) { case token_charset_hyphen: case token_charset_invert: case token_charset_end: case token_posix_charset_end: begin = iprev; // un-get these tokens ch_prev = *begin++; have_prev = true; continue; case token_charset_backspace: ch_prev = char_type(8); // backspace have_prev = true; continue; case token_posix_charset_begin: { FwdIter tmp = begin, start = begin; bool invert = (token_charset_invert == traits.get_charset_token(tmp, end)); if(invert) { begin = start = tmp; } while(token_literal == (tok = traits.get_charset_token(begin, end))) { tmp = ++begin; ensure(begin != end, error_brack, "unexpected end of pattern found"); } if(token_posix_charset_end == tok) { char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase); ensure(0 != chclass, error_ctype, "unknown class name"); chset.set_class(chclass, invert); continue; } begin = iprev; // un-get this token ch_prev = *begin++; have_prev = true; } continue; case token_escape: esc = parse_escape(begin, end, traits); if(escape_char == esc.type_) { ch_prev = esc.ch_; have_prev = true; } else if(escape_class == esc.type_) { char_class_type upper_ = lookup_classname(rxtraits, "upper"); BOOST_ASSERT(0 != upper_); chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_)); } else { BOOST_ASSERT(false); } continue; default: ch_prev = *begin++; have_prev = true; continue; } } while(ensure((iprev = begin) != end, error_brack, "unexpected end of pattern found"), token_charset_end != (tok = traits.get_charset_token(begin, end))); if(have_prev) { chset.set_char(ch_prev, rxtraits, icase); } if(invert) { chset.inverse(); } }
inline void set_class(compound_charset<Traits> &chset, typename Traits::char_class_type char_class, bool no, Traits const &) { chset.set_class(char_class, no); }
inline void set_range(compound_charset<Traits> &chset, Char from, Char to, Traits const &tr, bool icase) { chset.set_range(from, to, tr, icase); }
inline void set_char(compound_charset<Traits> &chset, Char ch, Traits const &tr, bool icase) { chset.set_char(ch, tr, icase); }