Пример #1
0
  int Encoding::mbclen(const uint8_t* p, const uint8_t* e, OnigEncodingType* enc) {
    int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);

    if (ONIGENC_MBCLEN_CHARFOUND_P(n) && ONIGENC_MBCLEN_CHARFOUND_LEN(n) <= e-p) {
      return ONIGENC_MBCLEN_CHARFOUND_LEN(n);
    } else {
      int min = ONIGENC_MBC_MINLEN(enc);
      return min <= e-p ? min : (int)(e-p);
    }
  }
Пример #2
0
extern int
onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
{
  int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
  if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
    return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
  else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
    return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
  return 1;
}
Пример #3
0
  SymbolTable::Kind SymbolTable::detect_kind(STATE, const Symbol* sym) {
    std::string str = strings[sym->index()];
    size_t size = str.size();
    uint8_t* p = reinterpret_cast<uint8_t*>(const_cast<char*>(str.c_str()));

    Encoding* e = Encoding::from_index(state, encodings[sym->index()]);
    OnigEncodingType* enc = e->encoding();

    // Constants start with A-Z, followed by alphanumeric characters or '_' or
    // non-ascii character.
    if(isupper(*p)) {
      uint8_t* e = p + size;
      int n = 0, code = 0;

      for(++p; p < e; p += n) {
        n = Encoding::precise_mbclen(p, e, enc);
        if(!ONIGENC_MBCLEN_CHARFOUND_P(n)) {
          return SymbolTable::eNormal;
        }

        n = ONIGENC_MBCLEN_CHARFOUND_LEN(n);
        code = ONIGENC_MBC_TO_CODE(enc, p, p + n);
        if(!(ONIGENC_IS_CODE_ALNUM(enc, code) || *p == '_' || !ISASCII(*p))) {
          return SymbolTable::eNormal;
        }
      }

      return SymbolTable::eConstant;
    }

    if(p[0] == '@') {
      // A class variable begins with @@
      if(size > 1 && p[1] == '@') {
        return SymbolTable::eCVar;
      }

      // An instance variable can't start with a digit and can't be just @.
      if((size == 1) || (size > 1 && ISDIGIT(p[1]))) {
        return SymbolTable::eNormal;
      }

      // An instance variable begins with @
      return SymbolTable::eIVar;
    }

    // A system variable begins with __
    if(size > 2 && p[0] == '_' && p[1] == '_') {
      return SymbolTable::eSystem;
    }

    // Everything else is normal
    return SymbolTable::eNormal;
  }
Пример #4
0
  native_int Encoding::string_character_length(const uint8_t* p, const uint8_t* e, OnigEncodingType* enc) {
    native_int chars;

    for(chars = 0; p < e; chars++) {
      int n = Encoding::precise_mbclen(p, e, enc);

      if(ONIGENC_MBCLEN_CHARFOUND_P(n)) {
        p += ONIGENC_MBCLEN_CHARFOUND_LEN(n);
      } else if(p + ONIGENC_MBC_MINLEN(enc) <= e) {
        p += ONIGENC_MBC_MINLEN(enc);
      } else {
        p = e;
      }
    }
    return chars;
  }
Пример #5
0
  Character* Character::create_from(STATE, String* str, Fixnum* byte) {
    native_int i = byte->to_native();
    native_int size = str->byte_size();

    if(i < 0 || i >= size) return nil<Character>();

    OnigEncodingType* enc = str->encoding()->get_encoding();
    uint8_t* p = str->byte_address() + i;
    uint8_t* e = str->byte_address() + str->byte_size();

    int c = Encoding::precise_mbclen(p, e, enc);

    if(!ONIGENC_MBCLEN_CHARFOUND_P(c)) return nil<Character>();

    int n = ONIGENC_MBCLEN_CHARFOUND_LEN(c);
    if(i + n > size) return nil<Character>();

    Character* chr = Character::create(state, (const char*)p, n);
    chr->encoding(state, str->encoding());

    return chr;
  }