Exemplo n.º 1
0
inline sequence<BidiIter> make_charset_xpression
(
    compound_charset<Traits> &chset
  , Traits const &tr
  , regex_constants::syntax_option_type flags
)
{
    typedef typename Traits::char_type char_type;
    bool const icase = (0 != (regex_constants::icase_ & flags));
    bool const optimize = is_narrow_char<char_type>::value && 0 != (regex_constants::optimize & flags);

    // don't care about compile speed -- fold eveything into a bitset<256>
    if(optimize)
    {
        typedef basic_chset<char_type> charset_type;
        charset_type charset(chset.base());
        if(icase)
        {
            charset_matcher<Traits, mpl::true_, charset_type> matcher(charset);
            merge_charset(matcher.charset_, chset, tr);
            return make_dynamic<BidiIter>(matcher);
        }
        else
        {
            charset_matcher<Traits, mpl::false_, charset_type> matcher(charset);
            merge_charset(matcher.charset_, chset, tr);
            return make_dynamic<BidiIter>(matcher);
        }
    }

    // special case to make [[:digit:]] fast
    else if(chset.base().empty() && chset.posix_no().empty())
    {
        BOOST_ASSERT(0 != chset.posix_yes());
        posix_charset_matcher<Traits> matcher(chset.posix_yes(), chset.is_inverted());
        return make_dynamic<BidiIter>(matcher);
    }

    // default, slow
    else
    {
        if(icase)
        {
            charset_matcher<Traits, mpl::true_> matcher(chset);
            return make_dynamic<BidiIter>(matcher);
        }
        else
        {
            charset_matcher<Traits, mpl::false_> matcher(chset);
            return make_dynamic<BidiIter>(matcher);
        }
    }
}
Exemplo n.º 2
0
inline void merge_charset
(
    basic_chset<Char> &basic
  , compound_charset<Traits> const &compound
  , Traits const &tr
)
{
    detail::ignore_unused(tr);
    if(0 != compound.posix_yes())
    {
        typename Traits::char_class_type mask = compound.posix_yes();
        for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
        {
            if(tr.isctype((Char)i, mask))
            {
                basic.set((Char)i);
            }
        }
    }

    if(!compound.posix_no().empty())
    {
        for(std::size_t j = 0; j < compound.posix_no().size(); ++j)
        {
            typename Traits::char_class_type mask = compound.posix_no()[j];
            for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
            {
                if(!tr.isctype((Char)i, mask))
                {
                    basic.set((Char)i);
                }
            }
        }
    }

    if(compound.is_inverted())
    {
        basic.inverse();
    }
}
Exemplo n.º 3
0
inline void parse_charset
(
    FwdIter &begin
  , FwdIter end
  , compound_charset<RegexTraits> &chset
  , CompilerTraits &traits
)
{
    using namespace regex_constants;
    typedef typename RegexTraits::char_type char_type;
    typedef typename RegexTraits::char_class_type char_class_type;
    BOOST_ASSERT(begin != end);
    RegexTraits const &rxtraits = traits.traits();
    bool const icase = (0 != (regex_constants::icase_ & traits.flags()));
    FwdIter iprev = FwdIter();
    escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
    bool invert = false;

    // check to see if we have an inverse charset
    if(begin != end && token_charset_invert == traits.get_charset_token(iprev = begin, end))
    {
        begin = iprev;
        invert = true;
    }

    // skip the end token if-and-only-if it is the first token in the charset
    if(begin != end && token_charset_end == traits.get_charset_token(iprev = begin, end))
    {
        for(; begin != iprev; ++begin)
        {
            chset.set_char(*begin, rxtraits, icase);
        }
    }

    compiler_token_type tok;
    char_type ch_prev = char_type(), ch_next = char_type();
    bool have_prev = false;

    ensure(begin != end, error_brack, "unexpected end of pattern found");

    // remember the current position and grab the next token
    iprev = begin;
    tok = traits.get_charset_token(begin, end);
    do
    {
        ensure(begin != end, error_brack, "unexpected end of pattern found");

        if(token_charset_hyphen == tok && have_prev)
        {
            // remember the current position
            FwdIter iprev2 = begin;
            have_prev = false;

            // ch_prev is lower bound of a range
            switch(traits.get_charset_token(begin, end))
            {
            case token_charset_hyphen:
            case token_charset_invert:
                begin = iprev2; // un-get these tokens and fall through
            case token_literal:
                ch_next = *begin++;
                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");
                chset.set_range(ch_prev, ch_next, rxtraits, icase);
                continue;
            case token_charset_backspace:
                ch_next = char_type(8); // backspace
                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");
                chset.set_range(ch_prev, ch_next, rxtraits, icase);
                continue;
            case token_escape:
                esc = parse_escape(begin, end, traits);
                if(escape_char == esc.type_)
                {
                    detail::ensure(ch_prev <= esc.ch_, error_range, "invalid charset range");
                    chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
                    continue;
                }
            case token_charset_end: // fall through
            default:                // not a range.
                begin = iprev;      // backup to hyphen token
                chset.set_char(ch_prev, rxtraits, icase);
                chset.set_char(*begin++, rxtraits, icase);
                continue;
            }
        }

        if(have_prev)
        {
            chset.set_char(ch_prev, rxtraits, icase);
            have_prev = false;
        }

        switch(tok)
        {
        case token_charset_hyphen:
        case token_charset_invert:
        case token_charset_end:
        case token_posix_charset_end:
            begin = iprev; // un-get these tokens
            ch_prev = *begin++;
            have_prev = true;
            continue;

        case token_charset_backspace:
            ch_prev = char_type(8); // backspace
            have_prev = true;
            continue;

        case token_posix_charset_begin:
            {
                FwdIter tmp = begin, start = begin;
                bool invert = (token_charset_invert == traits.get_charset_token(tmp, end));
                if(invert)
                {
                    begin = start = tmp;
                }
                while(token_literal == (tok = traits.get_charset_token(begin, end)))
                {
                    tmp = ++begin;
                    ensure(begin != end, error_brack, "unexpected end of pattern found");
                }
                if(token_posix_charset_end == tok)
                {
                    char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
                    ensure(0 != chclass, error_ctype, "unknown class name");
                    chset.set_class(chclass, invert);
                    continue;
                }
                begin = iprev; // un-get this token
                ch_prev = *begin++;
                have_prev = true;
            }
            continue;

        case token_escape:
            esc = parse_escape(begin, end, traits);
            if(escape_char == esc.type_)
            {
                ch_prev = esc.ch_;
                have_prev = true;
            }
            else if(escape_class == esc.type_)
            {
                char_class_type upper_ = lookup_classname(rxtraits, "upper");
                BOOST_ASSERT(0 != upper_);
                chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
            }
            else
            {
                BOOST_ASSERT(false);
            }
            continue;

        default:
            ch_prev = *begin++;
            have_prev = true;
            continue;
        }
    }
    while(ensure((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
          token_charset_end != (tok = traits.get_charset_token(begin, end)));

    if(have_prev)
    {
        chset.set_char(ch_prev, rxtraits, icase);
    }

    if(invert)
    {
        chset.inverse();
    }
}
Exemplo n.º 4
0
inline void set_class(compound_charset<Traits> &chset, typename Traits::char_class_type char_class, bool no, Traits const &)
{
    chset.set_class(char_class, no);
}
Exemplo n.º 5
0
inline void set_range(compound_charset<Traits> &chset, Char from, Char to, Traits const &tr, bool icase)
{
    chset.set_range(from, to, tr, icase);
}
Exemplo n.º 6
0
inline void set_char(compound_charset<Traits> &chset, Char ch, Traits const &tr, bool icase)
{
    chset.set_char(ch, tr, icase);
}