static int64_t uto64(const UChar *buffer) { int64_t result = 0; /* iterate through buffer */ while(*buffer) { /* read the next digit */ result *= 16; if (!u_isxdigit(*buffer)) { log_err("\\u%04X is not a valid hex digit for this test\n", (UChar)*buffer); } result += *buffer - 0x0030 - (*buffer >= 0x0041 ? (*buffer >= 0x0061 ? 39 : 7) : 0); buffer++; } return result; }
static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags) { ASSERT_ARGS(u_iscclass) #if PARROT_HAS_ICU UNUSED(interp); /* XXX which one return u_charDigitValue(codepoint); */ if ((flags & enum_cclass_uppercase) && u_isupper(codepoint)) return 1; if ((flags & enum_cclass_lowercase) && u_islower(codepoint)) return 1; if ((flags & enum_cclass_alphabetic) && u_isalpha(codepoint)) return 1; if ((flags & enum_cclass_numeric) && u_isdigit(codepoint)) return 1; if ((flags & enum_cclass_hexadecimal) && u_isxdigit(codepoint)) return 1; if ((flags & enum_cclass_whitespace) && u_isspace(codepoint)) return 1; if ((flags & enum_cclass_printing) && u_isprint(codepoint)) return 1; if ((flags & enum_cclass_graphical) && u_isgraph(codepoint)) return 1; if ((flags & enum_cclass_blank) && u_isblank(codepoint)) return 1; if ((flags & enum_cclass_control) && u_iscntrl(codepoint)) return 1; if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint)) return 1; if ((flags & enum_cclass_word) && (u_isalnum(codepoint) || codepoint == '_')) return 1; if ((flags & enum_cclass_newline) && (codepoint == 0x2028 || codepoint == 0x2029 || u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK))) return 1; return 0; #else if (codepoint < 256) return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0; if (flags == enum_cclass_any) return 1; /* All codepoints from u+0100 to u+02af are alphabetic, so we * cheat on the WORD and ALPHABETIC properties to include these * (and incorrectly exclude all others). This is a stopgap until * ICU is everywhere, or we have better non-ICU unicode support. */ if (flags == enum_cclass_word || flags == enum_cclass_alphabetic) return (codepoint < 0x2b0); if (flags & enum_cclass_whitespace) { /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */ switch (codepoint) { case 0x1680: case 0x180e: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200a: case 0x2028: case 0x2029: case 0x202f: case 0x205f: case 0x3000: return 1; default: break; } } if (flags & enum_cclass_numeric) { /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */ if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1; if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1; if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1; if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1; if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1; if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1; if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1; if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1; if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1; if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1; if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1; if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1; if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1; if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1; if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1; if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1; if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1; if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1; if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1; if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1; if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1; if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1; } if (flags & enum_cclass_newline) { /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt * Line_Break=Mandatory_Break*/ if (codepoint == 0x2028 || codepoint == 0x2029) return 1; } if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline)) Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR, "no ICU lib loaded"); return 0; #endif }
static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { return u_isxdigit(c); }
// Determines whether the specified code point is a hexadecimal digit. // This is equivalent to u_digit(c, 16)>=0. // True for characters with general category "Nd" (decimal digit numbers) // as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. // (That is, for letters with code points // 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) bool BUnicodeChar::IsHexDigit(uint32 c) { BUnicodeChar(); return u_isxdigit(c); }
U_CAPI UBool U_EXPORT2 u_hasBinaryProperty(UChar32 c, UProperty which) { /* c is range-checked in the functions that are called from here */ if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { /* not a known binary property */ } else { uint32_t mask=binProps[which].mask; int32_t column=binProps[which].column; if(mask!=0) { /* systematic, directly stored properties */ return (u_getUnicodeProperties(c, column)&mask)!=0; } else { if(column==UPROPS_SRC_CASE) { return ucase_hasBinaryProperty(c, which); } else if(column==UPROPS_SRC_NORM) { #if !UCONFIG_NO_NORMALIZATION /* normalization properties from unorm.icu */ switch(which) { case UCHAR_SEGMENT_STARTER: return unorm_isCanonSafeStart(c); default: break; } #endif } else if(column==UPROPS_SRC_NFC) { #if !UCONFIG_NO_NORMALIZATION UErrorCode errorCode=U_ZERO_ERROR; switch(which) { case UCHAR_FULL_COMPOSITION_EXCLUSION: { // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); break; } default: { // UCHAR_NF[CD]_INERT properties const Normalizer2 *norm2=Normalizer2Factory::getInstance( (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); return U_SUCCESS(errorCode) && norm2->isInert(c); } } #endif } else if(column==UPROPS_SRC_NFKC) { #if !UCONFIG_NO_NORMALIZATION // UCHAR_NFK[CD]_INERT properties UErrorCode errorCode=U_ZERO_ERROR; const Normalizer2 *norm2=Normalizer2Factory::getInstance( (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); return U_SUCCESS(errorCode) && norm2->isInert(c); #endif } else if(column==UPROPS_SRC_NFKC_CF) { // currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED #if !UCONFIG_NO_NORMALIZATION UErrorCode errorCode=U_ZERO_ERROR; const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); if(U_SUCCESS(errorCode)) { UnicodeString src(c); UnicodeString dest; { // The ReorderingBuffer must be in a block because its destructor // needs to release dest's buffer before we look at its contents. ReorderingBuffer buffer(*kcf, dest); // Small destCapacity for NFKC_CF(c). if(buffer.init(5, errorCode)) { const UChar *srcArray=src.getBuffer(); kcf->compose(srcArray, srcArray+src.length(), FALSE, TRUE, buffer, errorCode); } } return U_SUCCESS(errorCode) && dest!=src; } #endif } else if(column==UPROPS_SRC_BIDI) { /* bidi/shaping properties */ const UBiDiProps *bdp=GET_BIDI_PROPS(); if(bdp!=NULL) { switch(which) { case UCHAR_BIDI_MIRRORED: return ubidi_isMirrored(bdp, c); case UCHAR_BIDI_CONTROL: return ubidi_isBidiControl(bdp, c); case UCHAR_JOIN_CONTROL: return ubidi_isJoinControl(bdp, c); default: break; } } /* else return FALSE below */ } else if(column==UPROPS_SRC_CHAR) { switch(which) { case UCHAR_POSIX_BLANK: return u_isblank(c); case UCHAR_POSIX_GRAPH: return u_isgraphPOSIX(c); case UCHAR_POSIX_PRINT: return u_isprintPOSIX(c); case UCHAR_POSIX_XDIGIT: return u_isxdigit(c); default: break; } } else if(column==UPROPS_SRC_CHAR_AND_PROPSVEC) { switch(which) { case UCHAR_POSIX_ALNUM: return u_isalnumPOSIX(c); default: break; } } else if(column==UPROPS_SRC_CASE_AND_NORM) { #if !UCONFIG_NO_NORMALIZATION UChar nfdBuffer[4]; const UChar *nfd; int32_t nfdLength; UErrorCode errorCode=U_ZERO_ERROR; const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); if(U_FAILURE(errorCode)) { return FALSE; } switch(which) { case UCHAR_CHANGES_WHEN_CASEFOLDED: nfd=nfcImpl->getDecomposition(c, nfdBuffer, nfdLength); if(nfd!=NULL) { /* c has a decomposition */ if(nfdLength==1) { c=nfd[0]; /* single BMP code point */ } else if(nfdLength<=U16_MAX_LENGTH) { int32_t i=0; U16_NEXT(nfd, i, nfdLength, c); if(i==nfdLength) { /* single supplementary code point */ } else { c=U_SENTINEL; } } else { c=U_SENTINEL; } } else if(c<0) { return FALSE; /* protect against bad input */ } errorCode=U_ZERO_ERROR; if(c>=0) { /* single code point */ const UCaseProps *csp=ucase_getSingleton(&errorCode); const UChar *resultString; return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); } else { /* guess some large but stack-friendly capacity */ UChar dest[2*UCASE_MAX_STRING_LENGTH]; int32_t destLength; destLength=u_strFoldCase(dest, LENGTHOF(dest), nfd, nfdLength, U_FOLD_CASE_DEFAULT, &errorCode); return (UBool)(U_SUCCESS(errorCode) && 0!=u_strCompare(nfd, nfdLength, dest, destLength, FALSE)); } default: break; } #endif } } } return FALSE; }