示例#1
0
  Object* Character::alphabetical_p(STATE) {
    bool found;
    int c = codepoint(state, &found);

    OnigEncodingType* enc = encoding()->get_encoding();
    return RBOOL(found && ONIGENC_IS_CODE_ALPHA(enc, c));
  }
示例#2
0
  Object* Character::lower_p(STATE) {
    bool found;
    int c = codepoint(state, &found);

    OnigEncodingType* enc = encoding()->get_encoding();
    return RBOOL(found && ONIGENC_IS_CODE_LOWER(enc, c));
  }
示例#3
0
  Object* Character::punctuation_p(STATE) {
    bool found;
    int c = codepoint(state, &found);

    OnigEncodingType* enc = encoding()->get_encoding();
    return RBOOL(found && ONIGENC_IS_CODE_PUNCT(enc, c));
  }
示例#4
0
文件: unicode.hpp 项目: Teile/Helper
 inline codepoint encode_raw_byte(char byte)
 {
     codepoint c = codepoint(byte) & 0xFF;
     if (c == 0)
     {
         throw encoding_error("null or zero byte cannot be raw-data encoded");
     }
     return c | 0xF800;
 }
示例#5
0
文件: unicode.hpp 项目: Teile/Helper
 typename boost::enable_if_c<bit_size<typename std::iterator_traits<T>::value_type>::value >= 21, codepoint>::type next(T& it, T const& end)
 {
     codepoint c = codepoint(*it);
     if (!is_valid(c))
     {
         throw encoding_error("invalid codepoint");
     }
     ++it;
     return c;
 }
示例#6
0
  Object* Character::ascii_p(STATE) {
    bool found;
    int c = codepoint(state, &found);

    return RBOOL(found && ONIGENC_IS_CODE_ASCII(c));
  }
示例#7
0
文件: scanner.cpp 项目: bd808/hhvm
std::string Scanner::escape(const char *str, int len, char quote_type) const {
  std::string output;
  output.reserve(len);

  if (quote_type == '\'') {
    for (int i = 0; i < len; i++) {
      unsigned char ch = str[i];
      if (ch == '\\') {
        if (++i < len) {
          switch (str[i]) {
            case '\\': output += "\\"; break;
            case '\'': output += '\''; break;
            default: {
              output += ch;
              output += str[i];
              break;
            }
          }
        } else {
          assert(false);
          output += ch;
        }
      } else {
        output += ch;
      }
    }
  } else {
    for (int i = 0; i < len; i++) {
      unsigned char ch = str[i];
      if (ch == '\\') {
        if (++i < len) {
          switch (str[i]) {
            case 'n':  output += '\n'; break;
            case 't':  output += '\t'; break;
            case 'r':  output += '\r'; break;
            case 'v':  output += '\v'; break;
            case 'f':  output += '\f'; break;
            case 'e':  output += '\033'; break;
            case '\\': output += '\\'; break;
            case '$':  output += '$';  break;
            case '"':
            case '`':
              if (str[i] != quote_type) {
                output += '\\';
              }
              output += str[i];
              break;
            case 'x':
            case 'X': {
              if (isxdigit(str[i+1])) {
                std::string shex;
                shex += str[++i]; // 0th hex digit
                if (isxdigit(str[i+1])) {
                  shex += str[++i]; // 1st hex digit
                }
                output += strtol(shex.c_str(), nullptr, 16);
              } else {
                output += ch;
                output += str[i];
              }
              break;
            }
            case 'u': {
              // Unicode escape sequence
              //   "\u{123456}"
              if (str[i+1] != '{') {
                // BC for "\u1234" passthrough
                output += ch;
                output += str[i];
                break;
              }

              bool valid = true;
              auto start = str + i + 2;
              auto closebrace = strchr(start, '}');
              if (closebrace > start) {
                for (auto p = start; p < closebrace; ++p) {
                  if (!isxdigit(*p)) {
                    valid = false;
                    break;
                  }
                }
              } else {
                valid = false;
              }

              auto fatal = [this](const char *msg) {
                auto loc = getLocation();
                return ParseTimeFatalException(
                  loc->file,
                  loc->r.line0,
                  "%s", msg);
              };
              if (!valid) {
                throw fatal("Invalid UTF-8 codepoint escape sequence");
              }

              std::string codepoint(start, closebrace - start);
              char *end = nullptr;
              int32_t uchar = strtol(codepoint.c_str(), &end, 16);
              if ((end && *end) || (uchar > 0x10FFFF)) {
                throw fatal(
                  "Invalid UTF-8 codepoint escape sequence: "
                  "Codepoint too large");
              }
              if (uchar <= 0x0007F) {
                output += (char)uchar;
              } else if (uchar <= 0x007FF) {
                output += (char)(0xC0 | ( uchar >> 6         ));
                output += (char)(0x80 | ( uchar        & 0x3F));
              } else if (uchar <= 0x00FFFF) {
                output += (char)(0xE0 | ( uchar >> 12        ));
                output += (char)(0x80 | ((uchar >>  6) & 0x3F));
                output += (char)(0x80 | ( uchar        & 0x3F));
              } else if (uchar <= 0x10FFFF) {
                output += (char)(0xF0 | ( uchar >> 18        ));
                output += (char)(0x80 | ((uchar >> 12) & 0x3F));
                output += (char)(0x80 | ((uchar >>  6) & 0x3F));
                output += (char)(0x80 | ( uchar        & 0x3F));
              } else {
                not_reached();
                assert(false);
              }
              i += codepoint.size() + 2 /* strlen("{}") */;
              break;
            }