int Encoding::mbclen(const uint8_t* p, const uint8_t* e, OnigEncodingType* enc) { int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (ONIGENC_MBCLEN_CHARFOUND_P(n) && ONIGENC_MBCLEN_CHARFOUND_LEN(n) <= e-p) { return ONIGENC_MBCLEN_CHARFOUND_LEN(n); } else { int min = ONIGENC_MBC_MINLEN(enc); return min <= e-p ? min : (int)(e-p); } }
extern int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) { int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); return 1; }
SymbolTable::Kind SymbolTable::detect_kind(STATE, const Symbol* sym) { std::string str = strings[sym->index()]; size_t size = str.size(); uint8_t* p = reinterpret_cast<uint8_t*>(const_cast<char*>(str.c_str())); Encoding* e = Encoding::from_index(state, encodings[sym->index()]); OnigEncodingType* enc = e->encoding(); // Constants start with A-Z, followed by alphanumeric characters or '_' or // non-ascii character. if(isupper(*p)) { uint8_t* e = p + size; int n = 0, code = 0; for(++p; p < e; p += n) { n = Encoding::precise_mbclen(p, e, enc); if(!ONIGENC_MBCLEN_CHARFOUND_P(n)) { return SymbolTable::eNormal; } n = ONIGENC_MBCLEN_CHARFOUND_LEN(n); code = ONIGENC_MBC_TO_CODE(enc, p, p + n); if(!(ONIGENC_IS_CODE_ALNUM(enc, code) || *p == '_' || !ISASCII(*p))) { return SymbolTable::eNormal; } } return SymbolTable::eConstant; } if(p[0] == '@') { // A class variable begins with @@ if(size > 1 && p[1] == '@') { return SymbolTable::eCVar; } // An instance variable can't start with a digit and can't be just @. if((size == 1) || (size > 1 && ISDIGIT(p[1]))) { return SymbolTable::eNormal; } // An instance variable begins with @ return SymbolTable::eIVar; } // A system variable begins with __ if(size > 2 && p[0] == '_' && p[1] == '_') { return SymbolTable::eSystem; } // Everything else is normal return SymbolTable::eNormal; }
native_int Encoding::string_character_length(const uint8_t* p, const uint8_t* e, OnigEncodingType* enc) { native_int chars; for(chars = 0; p < e; chars++) { int n = Encoding::precise_mbclen(p, e, enc); if(ONIGENC_MBCLEN_CHARFOUND_P(n)) { p += ONIGENC_MBCLEN_CHARFOUND_LEN(n); } else if(p + ONIGENC_MBC_MINLEN(enc) <= e) { p += ONIGENC_MBC_MINLEN(enc); } else { p = e; } } return chars; }
Character* Character::create_from(STATE, String* str, Fixnum* byte) { native_int i = byte->to_native(); native_int size = str->byte_size(); if(i < 0 || i >= size) return nil<Character>(); OnigEncodingType* enc = str->encoding()->get_encoding(); uint8_t* p = str->byte_address() + i; uint8_t* e = str->byte_address() + str->byte_size(); int c = Encoding::precise_mbclen(p, e, enc); if(!ONIGENC_MBCLEN_CHARFOUND_P(c)) return nil<Character>(); int n = ONIGENC_MBCLEN_CHARFOUND_LEN(c); if(i + n > size) return nil<Character>(); Character* chr = Character::create(state, (const char*)p, n); chr->encoding(state, str->encoding()); return chr; }