extern int onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { int len; const UChar *p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); } else { *lower = *p; } (*pp)++; return 1; } else { len = enc_len(enc, p); if (lower != p) { int i; for (i = 0; i < len; i++) { *lower++ = *p++; } } (*pp) += len; return len; /* return byte length of converted to lower char */ } }
static OnigCodePoint utf8_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) { int c, len; OnigCodePoint n; len = enc_len(ONIG_ENCODING_UTF8, p, end); c = *p++; if (len > 1) { len--; n = c & ((1 << (6 - len)) - 1); while (len--) { c = *p++; n = (n << 6) | (c & ((1 << 6) - 1)); } return n; } else { #ifdef USE_INVALID_CODE_SCHEME if (c > 0xfd) { return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); } #endif return (OnigCodePoint )c; } }
extern UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); if (p < s) { p += enc_len(enc, *p); } return p; }
extern UChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, UChar* start, UChar* s, UChar** prev) { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); if (p < s) { if (prev) *prev = p; p += enc_len(enc, *p); } else { if (prev) *prev = (UChar* )NULL; /* Sorry */ } return p; }
static OnigCodePoint mbc_to_code(const UChar* p, const UChar* end) { int c, i, len; OnigCodePoint n; len = enc_len(ONIG_ENCODING_EUC_JP, p); n = (OnigCodePoint )*p++; if (len == 1) return n; for (i = 1; i < len; i++) { if (p >= end) break; c = *p++; n <<= 8; n += c; } return n; }
extern OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) { int c, i, len; OnigCodePoint n; len = enc_len(enc, p); n = (OnigCodePoint )(*p++); if (len == 1) return n; for (i = 1; i < len; i++) { if (p >= end) break; c = *p++; n <<= 8; n += c; } return n; }
static UChar* euctw_left_adjust_char_head(const UChar* start, const UChar* s) { /* Assumed in this encoding, mb-trail bytes don't mix with single bytes. */ const UChar *p; int len; if (s <= start) return (UChar* )s; p = s; while (!euctw_islead(*p) && p > start) p--; len = enc_len(ONIG_ENCODING_EUC_TW, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); }
extern int onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { (*pp)++; if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } else { return FALSE; } } (*pp) += enc_len(enc, p); return FALSE; }
extern int onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower) { int len; if (ONIGENC_IS_MBC_ASCII(p)) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); return 1; } else { len = enc_len(enc, *p); if (lower != p) { /* memcpy(lower, p, len); */ int i; for (i = 0; i < len; i++) { *lower++ = *p++; } } return len; /* return byte length of converted to lower char */ } }
static UChar* big5_left_adjust_char_head(const UChar* start, const UChar* s) { const UChar *p; int len; if (s <= start) return (UChar* )s; p = s; if (BIG5_ISMB_TRAIL(*p)) { while (p > start) { if (! BIG5_ISMB_FIRST(*--p)) { p++; break; } } } len = enc_len(ONIG_ENCODING_BIG5, p); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); }