// // replaceCharRefs // // replace the char entities < & { ካ etc. in a string // with the corresponding actual character. // void UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) { UnicodeString result; UnicodeString replacement; int i; mAmps.reset(s); // See the initialization for the regex matcher mAmps. // Which entity we've matched is determined by which capture group has content, // which is flaged by start() of that group not being -1. while (mAmps.find()) { if (mAmps.start(1, status) != -1) { replacement.setTo((UChar)x_AMP); } else if (mAmps.start(2, status) != -1) { replacement.setTo((UChar)x_LT); } else if (mAmps.start(3, status) != -1) { replacement.setTo((UChar)x_GT); } else if (mAmps.start(4, status) != -1) { replacement.setTo((UChar)x_APOS); } else if (mAmps.start(5, status) != -1) { replacement.setTo((UChar)x_QUOT); } else if (mAmps.start(6, status) != -1) { UnicodeString hexString = mAmps.group(6, status); UChar32 val = 0; for (i=0; i<hexString.length(); i++) { val = (val << 4) + u_digit(hexString.charAt(i), 16); } // TODO: some verification that the character is valid replacement.setTo(val); } else if (mAmps.start(7, status) != -1) { UnicodeString decimalString = mAmps.group(7, status); UChar32 val = 0; for (i=0; i<decimalString.length(); i++) { val = val*10 + u_digit(decimalString.charAt(i), 10); } // TODO: some verification that the character is valid replacement.setTo(val); } else { // An unrecognized &entity; Leave it alone. // TODO: check that it really looks like an entity, and is not some // random & in the text. replacement = mAmps.group((int32_t)0, status); } mAmps.appendReplacement(result, replacement, status); } mAmps.appendTail(result); s = result; }
bool icu_regex_traits::isctype(char_type c, char_class_type f) const { // check for standard catagories first: char_class_type m = char_class_type(1u << u_charType(c)); if((m & f) != 0) return true; // now check for special cases: if(((f & mask_blank) != 0) && u_isblank(c)) return true; if(((f & mask_space) != 0) && u_isspace(c)) return true; if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) return true; if(((f & mask_unicode) != 0) && (c >= 0x100)) return true; if(((f & mask_underscore) != 0) && (c == '_')) return true; if(((f & mask_any) != 0) && (c <= 0x10FFFF)) return true; if(((f & mask_ascii) != 0) && (c <= 0x7F)) return true; if(((f & mask_vertical) != 0) && (::boost::re_detail::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) return true; if(((f & mask_horizontal) != 0) && !::boost::re_detail::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v'))) return true; return false; }
/** * Parse an unsigned 31-bit integer at the given offset. Use * UCharacter.digit() to parse individual characters into digits. * @param text the text to be parsed * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the * offset within text at which to start parsing; it should point * to a valid digit. On exit, pos[0] is the offset after the last * parsed character. If the parse failed, it will be unchanged on * exit. Must be >= 0 on entry. * @param radix the radix in which to parse; must be >= 2 and <= * 36. * @return a non-negative parsed number, or -1 upon parse failure. * Parse fails if there are no digits, that is, if pos[0] does not * point to a valid digit on entry, or if the number to be parsed * does not fit into a 31-bit unsigned integer. */ int32_t ICU_Utility::parseNumber(const UnicodeString& text, int32_t& pos, int8_t radix) { // assert(pos[0] >= 0); // assert(radix >= 2); // assert(radix <= 36); int32_t n = 0; int32_t p = pos; while (p < text.length()) { UChar32 ch = text.char32At(p); int32_t d = u_digit(ch, radix); if (d < 0) { break; } n = radix*n + d; // ASSUME that when a 32-bit integer overflows it becomes // negative. E.g., 214748364 * 10 + 8 => negative value. if (n < 0) { return -1; } ++p; } if (p == pos) { return -1; } pos = p; return n; }
Variant HHVM_STATIC_METHOD(IntlChar, digit, const Variant& arg, int64_t radix) { GETCP(arg, cp); auto ret = u_digit(cp, radix); if (ret < 0) { s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "Invalid digit"); return false; } return ret; }
U_NAMESPACE_BEGIN /** * Parse an integer at pos, either of the form \d+ or of the form * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, * or octal format. * @param pos INPUT-OUTPUT parameter. On input, the first * character to parse. On output, the character after the last * parsed character. */ int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) { int32_t count = 0; int32_t value = 0; int32_t p = pos; int8_t radix = 10; if (p < limit && rule.charAt(p) == 48 /*0*/) { if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) { p += 2; radix = 16; } else { p++; count = 1; radix = 8; } } while (p < limit) { int32_t d = u_digit(rule.charAt(p++), radix); if (d < 0) { --p; break; } ++count; int32_t v = (value * radix) + d; if (v <= value) { // If there are too many input digits, at some point // the value will go negative, e.g., if we have seen // "0x8000000" already and there is another '0', when // we parse the next 0 the value will go negative. return 0; } value = v; } if (count > 0) { pos = p; } return value; }
/** * Convert a string to an unsigned decimal, ignoring rule whitespace. * @return a non-negative number if successful, or a negative number * upon failure. */ static int32_t stou(const UnicodeString& string) { int32_t n = 0; int32_t count = 0; UChar32 c; for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) { c = string.char32At(i); if (uprv_isRuleWhiteSpace(c)) { continue; } int32_t d = u_digit(c, 10); if (d < 0 || ++count > 10) { return -1; } n = 10*n + d; } return n; }
U_NAMESPACE_BEGIN int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) { int32_t count = 0; int32_t value = 0; int32_t p = pos; int8_t radix = 10; if (p < limit && rule.charAt(p) == 48 /*0*/) { if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) { p += 2; radix = 16; } else { p++; count = 1; radix = 8; } } while (p < limit) { int32_t d = u_digit(rule.charAt(p++), radix); if (d < 0) { --p; break; } ++count; int32_t v = (value * radix) + d; if (v <= value) { // If there are too many input digits, at some point // the value will go negative, e.g., if we have seen // "0x8000000" already and there is another '0', when // we parse the next 0 the value will go negative. return 0; } value = v; } if (count > 0) { pos = p; } return value; }
static jint Character_digitImpl(JNIEnv*, jclass, jint codePoint, jint radix) { return u_digit(codePoint, radix); }
//static jint Character_digitImpl(JNIEnv*, jclass, jint codePoint, jint radix) { JNIEXPORT jint JNICALL Java_java_lang_Character_digitImpl(JNIEnv*, jclass, jint codePoint, jint radix) { return u_digit(codePoint, radix); }
jint fastiva_vm_Character_C$__digitImpl(jint codePoint, jint radix) { return u_digit(codePoint, radix); }
int32 BUnicodeChar::DigitValue(uint32 c) { BUnicodeChar(); return u_digit(c, 10); }
/** * Implements {@link Transliterator#handleTransliterate}. */ void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos, UBool isIncremental) const { int32_t start = pos.start; int32_t limit = pos.limit; int32_t i, j, ipat; while (start < limit) { // Loop over the forms in spec[]. Exit this loop when we // match one of the specs. Exit the outer loop if a // partial match is detected and isIncremental is true. for (j=0, ipat=0; spec[ipat] != END; ++j) { // Read the header int32_t prefixLen = spec[ipat++]; int32_t suffixLen = spec[ipat++]; int8_t radix = (int8_t) spec[ipat++]; int32_t minDigits = spec[ipat++]; int32_t maxDigits = spec[ipat++]; // s is a copy of start that is advanced over the // characters as we parse them. int32_t s = start; UBool match = TRUE; for (i=0; i<prefixLen; ++i) { if (s >= limit) { if (i > 0) { // We've already matched a character. This is // a partial match, so we return if in // incremental mode. In non-incremental mode, // go to the next spec. if (isIncremental) { goto exit; } match = FALSE; break; } } UChar c = text.charAt(s++); if (c != spec[ipat + i]) { match = FALSE; break; } } if (match) { UChar32 u = 0; int32_t digitCount = 0; for (;;) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto exit; } break; } UChar32 ch = text.char32At(s); int32_t digit = u_digit(ch, radix); if (digit < 0) { break; } s += UTF_CHAR_LENGTH(ch); u = (u * radix) + digit; if (++digitCount == maxDigits) { break; } } match = (digitCount >= minDigits); if (match) { for (i=0; i<suffixLen; ++i) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto exit; } match = FALSE; break; } UChar c = text.charAt(s++); if (c != spec[ipat + prefixLen + i]) { match = FALSE; break; } } if (match) { // At this point, we have a match UnicodeString str(u); text.handleReplaceBetween(start, s, str); limit -= s - start - str.length(); // The following break statement leaves the // loop that is traversing the forms in // spec[]. We then parse the next input // character. break; } } } ipat += prefixLen + suffixLen; } if (start < limit) { start += UTF_CHAR_LENGTH(text.char32At(start)); } } exit: pos.contextLimit += limit - pos.limit; pos.limit = limit; pos.start = start; }