Ejemplo n.º 1
0
inline void parse_charset
(
    FwdIter &begin
  , FwdIter end
  , compound_charset<RegexTraits> &chset
  , CompilerTraits &traits
)
{
    using namespace regex_constants;
    typedef typename RegexTraits::char_type char_type;
    typedef typename RegexTraits::char_class_type char_class_type;
    BOOST_ASSERT(begin != end);
    RegexTraits const &rxtraits = traits.traits();
    bool const icase = (0 != (regex_constants::icase_ & traits.flags()));
    FwdIter iprev = FwdIter();
    escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
    bool invert = false;

    // check to see if we have an inverse charset
    if(begin != end && token_charset_invert == traits.get_charset_token(iprev = begin, end))
    {
        begin = iprev;
        invert = true;
    }

    // skip the end token if-and-only-if it is the first token in the charset
    if(begin != end && token_charset_end == traits.get_charset_token(iprev = begin, end))
    {
        for(; begin != iprev; ++begin)
        {
            chset.set_char(*begin, rxtraits, icase);
        }
    }

    compiler_token_type tok;
    char_type ch_prev = char_type(), ch_next = char_type();
    bool have_prev = false;

    ensure(begin != end, error_brack, "unexpected end of pattern found");

    // remember the current position and grab the next token
    iprev = begin;
    tok = traits.get_charset_token(begin, end);
    do
    {
        ensure(begin != end, error_brack, "unexpected end of pattern found");

        if(token_charset_hyphen == tok && have_prev)
        {
            // remember the current position
            FwdIter iprev2 = begin;
            have_prev = false;

            // ch_prev is lower bound of a range
            switch(traits.get_charset_token(begin, end))
            {
            case token_charset_hyphen:
            case token_charset_invert:
                begin = iprev2; // un-get these tokens and fall through
            case token_literal:
                ch_next = *begin++;
                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");
                chset.set_range(ch_prev, ch_next, rxtraits, icase);
                continue;
            case token_charset_backspace:
                ch_next = char_type(8); // backspace
                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");
                chset.set_range(ch_prev, ch_next, rxtraits, icase);
                continue;
            case token_escape:
                esc = parse_escape(begin, end, traits);
                if(escape_char == esc.type_)
                {
                    detail::ensure(ch_prev <= esc.ch_, error_range, "invalid charset range");
                    chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
                    continue;
                }
            case token_charset_end: // fall through
            default:                // not a range.
                begin = iprev;      // backup to hyphen token
                chset.set_char(ch_prev, rxtraits, icase);
                chset.set_char(*begin++, rxtraits, icase);
                continue;
            }
        }

        if(have_prev)
        {
            chset.set_char(ch_prev, rxtraits, icase);
            have_prev = false;
        }

        switch(tok)
        {
        case token_charset_hyphen:
        case token_charset_invert:
        case token_charset_end:
        case token_posix_charset_end:
            begin = iprev; // un-get these tokens
            ch_prev = *begin++;
            have_prev = true;
            continue;

        case token_charset_backspace:
            ch_prev = char_type(8); // backspace
            have_prev = true;
            continue;

        case token_posix_charset_begin:
            {
                FwdIter tmp = begin, start = begin;
                bool invert = (token_charset_invert == traits.get_charset_token(tmp, end));
                if(invert)
                {
                    begin = start = tmp;
                }
                while(token_literal == (tok = traits.get_charset_token(begin, end)))
                {
                    tmp = ++begin;
                    ensure(begin != end, error_brack, "unexpected end of pattern found");
                }
                if(token_posix_charset_end == tok)
                {
                    char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
                    ensure(0 != chclass, error_ctype, "unknown class name");
                    chset.set_class(chclass, invert);
                    continue;
                }
                begin = iprev; // un-get this token
                ch_prev = *begin++;
                have_prev = true;
            }
            continue;

        case token_escape:
            esc = parse_escape(begin, end, traits);
            if(escape_char == esc.type_)
            {
                ch_prev = esc.ch_;
                have_prev = true;
            }
            else if(escape_class == esc.type_)
            {
                char_class_type upper_ = lookup_classname(rxtraits, "upper");
                BOOST_ASSERT(0 != upper_);
                chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
            }
            else
            {
                BOOST_ASSERT(false);
            }
            continue;

        default:
            ch_prev = *begin++;
            have_prev = true;
            continue;
        }
    }
    while(ensure((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
          token_charset_end != (tok = traits.get_charset_token(begin, end)));

    if(have_prev)
    {
        chset.set_char(ch_prev, rxtraits, icase);
    }

    if(invert)
    {
        chset.inverse();
    }
}
Ejemplo n.º 2
0
inline void set_class(compound_charset<Traits> &chset, typename Traits::char_class_type char_class, bool no, Traits const &)
{
    chset.set_class(char_class, no);
}