static CharT decode_control_char (state &state_) { // Skip over 'c' state_.increment (); CharT ch_ = 0; bool eos_ = state_.next (ch_); if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex following \\c."); } else { if (ch_ >= 'a' && ch_ <= 'z') { ch_ -= 'a' - 1; } else if (ch_ >= 'A' && ch_ <= 'Z') { ch_ -= 'A' - 1; } else if (ch_ == '@') { // Apparently... ch_ = 0; } else { std::ostringstream ss_; ss_ << "Invalid control char at index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } } return ch_; }
// This function can call itself. static void charset (state &state_, string &chars_, bool &negated_) { CharT ch_ = 0; bool eos_ = state_.next (ch_); if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex " "following '['."); } negated_ = ch_ == '^'; if (negated_) { eos_ = state_.next (ch_); if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex " "following '^'."); } } bool chset_ = false; CharT prev_ = 0; while (ch_ != ']') { if (ch_ == '\\') { std::size_t str_len_ = 0; const CharT *str_ = escape_sequence (state_, prev_, str_len_); chset_ = str_ != 0; if (chset_) { state temp_state_ (str_ + 1, str_ + str_len_, state_._flags, state_._locale); string temp_chars_; bool temp_negated_ = false; charset (temp_state_, temp_chars_, temp_negated_); if (negated_ != temp_negated_) { std::ostringstream ss_; ss_ << "Mismatch in charset negation preceding " "index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } chars_ += temp_chars_; } } /* else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') { // TODO: POSIX charsets } */ else { chset_ = false; prev_ = ch_; } eos_ = state_.next (ch_); // Covers preceding if, else if and else if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex " "(missing ']')."); } if (ch_ == '-') { charset_range (chset_, state_, eos_, ch_, prev_, chars_); } else if (!chset_) { if ((state_._flags & icase) && (std::isupper (prev_, state_._locale) || std::islower (prev_, state_._locale))) { CharT upper_ = std::toupper (prev_, state_._locale); CharT lower_ = std::tolower (prev_, state_._locale); chars_ += upper_; chars_ += lower_; } else { chars_ += prev_; } } } if (!negated_ && chars_.empty ()) { throw runtime_error ("Empty charsets not allowed."); } }
static void charset_range (const bool chset_, state &state_, bool &eos_, CharT &ch_, const CharT prev_, string &chars_) { if (chset_) { std::ostringstream ss_; ss_ << "Charset cannot form start of range preceding " "index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } eos_ = state_.next (ch_); if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex " "following '-'."); } CharT curr_ = 0; if (ch_ == '\\') { std::size_t str_len_ = 0; if (escape_sequence (state_, curr_, str_len_)) { std::ostringstream ss_; ss_ << "Charset cannot form end of range preceding index " << state_.index () << '.'; throw runtime_error (ss_.str ().c_str ()); } } /* else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') { std::ostringstream ss_; ss_ << "POSIX char class cannot form end of range at " "index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } */ else { curr_ = ch_; } eos_ = state_.next (ch_); // Covers preceding if and else if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex " "(missing ']')."); } std::size_t start_ = static_cast<typename Traits::index_type> (prev_); std::size_t end_ = static_cast<typename Traits::index_type> (curr_); // Semanic check if (end_ < start_) { std::ostringstream ss_; ss_ << "Invalid range in charset preceding index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } chars_.reserve (chars_.size () + (end_ + 1 - start_)); for (; start_ <= end_; ++start_) { CharT ch_ = static_cast<CharT> (start_); if ((state_._flags & icase) && (std::isupper (ch_, state_._locale) || std::islower (ch_, state_._locale))) { CharT upper_ = std::toupper (ch_, state_._locale); CharT lower_ = std::tolower (ch_, state_._locale); chars_ += (upper_); chars_ += (lower_); } else { chars_ += (ch_); } } }
static CharT decode_hex (state &state_) { // Skip over 'x' state_.increment (); CharT ch_ = 0; bool eos_ = state_.next (ch_); if (eos_) { // Pointless returning index if at end of string throw runtime_error ("Unexpected end of regex following \\x."); } if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) { std::ostringstream ss_; ss_ << "Illegal char following \\x at index " << state_.index () - 1 << '.'; throw runtime_error (ss_.str ().c_str ()); } std::size_t hex_ = 0; do { hex_ *= 16; if (ch_ >= '0' && ch_ <= '9') { hex_ += ch_ - '0'; } else if (ch_ >= 'a' && ch_ <= 'f') { hex_ += 10 + (ch_ - 'a'); } else { hex_ += 10 + (ch_ - 'A'); } eos_ = state_.eos (); if (!eos_) { ch_ = *state_._curr; // Don't consume invalid chars! if (((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) { state_.increment (); } else { eos_ = true; } } } while (!eos_); return static_cast<CharT> (hex_); }