static int is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { int v = (EncISO_8859_15_CtypeTable[*p] & (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); if ((v | ONIGENC_CTYPE_LOWER) != 0) { /* 0xdf etc.. are lower case letter, but can't convert. */ if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba) return FALSE; else return TRUE; } return (v != 0 ? TRUE : FALSE); } return FALSE; }
static int iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { int v = (EncISO_8859_7_CtypeTable[*p] & (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); if ((v | ONIGENC_CTYPE_LOWER) != 0) { if (*p == 0xc0 || *p == 0xe0) return FALSE; else return TRUE; } return (v != 0 ? TRUE : FALSE); } return FALSE; }
static int iso_8859_16_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { if ((*p == 's' && *(p+1) == 's') || ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && (*p == 'S' && *(p+1) == 'S'))) { *lower = 0xdf; (*pp) += 2; return 1; } } if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); } else { *lower = *p; } (*pp)++; return 1; /* return byte length of converted char to lower */ }
static int utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; (*pp) += 4; if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { int c, v; p += 3; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { c = *p; v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); if ((v | ONIGENC_CTYPE_LOWER) != 0) { /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ if (c >= 0xaa && c <= 0xba) return FALSE; else return TRUE; } return (v != 0 ? TRUE : FALSE); } } return FALSE; }
static int utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { p += 3; if (end > p + 4 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && ((*p == 's' && *(p+4) == 's') || ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && (*p == 'S' && *(p+4) == 'S'))) && *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { *lower++ = '\0'; *lower++ = '\0'; *lower++ = '\0'; *lower = 0xdf; (*pp) += 8; return 4; } *lower++ = '\0'; *lower++ = '\0'; *lower++ = '\0'; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); } else { *lower = *p; } (*pp) += 4; return 4; /* return byte length of converted char to lower */ } else { int len = 4; if (lower != p) { int i; for (i = 0; i < len; i++) { *lower++ = *p++; } } (*pp) += len; return len; /* return byte length of converted char to lower */ } }
extern int onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { int len; const UChar *p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); } else { *lower = *p; } (*pp)++; return 1; } else { len = enc_len(enc, p); if (lower != p) { int i; for (i = 0; i < len; i++) { *lower++ = *p++; } } (*pp) += len; return len; /* return byte length of converted to lower char */ } }
extern int onigenc_mbn_mbc_is_case_ambig(UChar* p) { if (ONIGENC_IS_MBC_ASCII(p)) return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); return FALSE; }
static int mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); } else { *lower = *p; } (*pp)++; return 1; /* return byte length of converted char to lower */ }
static int iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { if (end > p + 1) { if ((*p == 's' && *(p+1) == 's') || ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && (*p == 'S' && *(p+1) == 'S'))) { (*pp) += 2; return TRUE; } } if (*p == 0xdf) { (*pp)++; return TRUE; } } (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && !ONIGENC_IS_MBC_ASCII(p))) { int v = (EncISO_8859_16_CtypeTable[*p] & (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); if ((v | ONIGENC_CTYPE_LOWER) != 0) { /* 0xdf is lower case letter, but can't convert. */ if (*p == 0xdf) return FALSE; else return TRUE; } return (v != 0 ? TRUE : FALSE); } return FALSE; }
extern int onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { (*pp)++; if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } else { return FALSE; } } (*pp) += enc_len(enc, p); return FALSE; }
extern int onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower) { int len; if (ONIGENC_IS_MBC_ASCII(p)) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); return 1; } else { len = enc_len(enc, *p); if (lower != p) { /* memcpy(lower, p, len); */ int i; for (i = 0; i < len; i++) { *lower++ = *p++; } } return len; /* return byte length of converted to lower char */ } }
static int mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* lower, OnigEncoding enc) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; } else { OnigCodePoint code; int len; code = get_lower_case(mbc_to_code(p, end, enc)); len = code_to_mbc(code, lower, enc); (*pp) += len; return len; /* return byte length of converted char to lower */ } }
static int mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* lower, OnigEncoding enc) { const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); (*pp)++; return 1; } else { int i; int len = enclen(enc, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } (*pp) += len; return len; /* return byte length of converted char to lower */ } }