Variant HHVM_STATIC_METHOD(IntlChar, charFromName, const String& name, int64_t choice) { UErrorCode error = U_ZERO_ERROR; auto ret = u_charFromName((UCharNameChoice)choice, name.c_str(), &error); if (U_FAILURE(error)) { s_intl_error->setError(error); return init_null(); } return ret; }
int getCharacterByName (wchar_t *character, const char *name) { #ifdef HAVE_ICU UErrorCode error = U_ZERO_ERROR; UChar uc = u_charFromName(U_EXTENDED_CHAR_NAME, name, &error); if (U_SUCCESS(error)) { *character = uc; return 1; } #endif /* HAVE_ICU */ return 0; }
UnicodeSet& UnicodeSet::applyPropertyAlias(const UnicodeString& prop, const UnicodeString& value, UErrorCode& ec) { if (U_FAILURE(ec) || isFrozen()) return *this; // prop and value used to be converted to char * using the default // converter instead of the invariant conversion. // This should not be necessary because all Unicode property and value // names use only invariant characters. // If there are any variant characters, then we won't find them anyway. // Checking first avoids assertion failures in the conversion. if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) || !uprv_isInvariantUString(value.getBuffer(), value.length()) ) { FAIL(ec); } CharString pname, vname; pname.appendInvariantChars(prop, ec); vname.appendInvariantChars(value, ec); if (U_FAILURE(ec)) return *this; UProperty p; int32_t v; UBool mustNotBeEmpty = FALSE, invert = FALSE; if (value.length() > 0) { p = u_getPropertyEnum(pname.data()); if (p == UCHAR_INVALID_CODE) FAIL(ec); // Treat gc as gcm if (p == UCHAR_GENERAL_CATEGORY) { p = UCHAR_GENERAL_CATEGORY_MASK; } if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) || (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) || (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) { v = u_getPropertyValueEnum(p, vname.data()); if (v == UCHAR_INVALID_CODE) { // Handle numeric CCC if (p == UCHAR_CANONICAL_COMBINING_CLASS || p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { char* end; double value = uprv_strtod(vname.data(), &end); v = (int32_t) value; if (v != value || v < 0 || *end != 0) { // non-integral or negative value, or trailing junk FAIL(ec); } // If the resultant set is empty then the numeric value // was invalid. mustNotBeEmpty = TRUE; } else { FAIL(ec); } } } else { switch (p) { case UCHAR_NUMERIC_VALUE: { char* end; double value = uprv_strtod(vname.data(), &end); if (*end != 0) { FAIL(ec); } applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec); return *this; } case UCHAR_NAME: { // Must munge name, since u_charFromName() does not do // 'loose' matching. char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec); if (U_SUCCESS(ec)) { clear(); add(ch); return *this; } else { FAIL(ec); } } case UCHAR_UNICODE_1_NAME: // ICU 49 deprecates the Unicode_1_Name property APIs. FAIL(ec); case UCHAR_AGE: { // Must munge name, since u_versionFromString() does not do // 'loose' matching. char buf[128]; if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); UVersionInfo version; u_versionFromString(version, buf); applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec); return *this; } case UCHAR_SCRIPT_EXTENSIONS: v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data()); if (v == UCHAR_INVALID_CODE) { FAIL(ec); } // fall through to calling applyIntPropertyValue() break; default: // p is a non-binary, non-enumerated property that we // don't support (yet). FAIL(ec); } } } else { // value is empty. Interpret as General Category, Script, or // Binary property. p = UCHAR_GENERAL_CATEGORY_MASK; v = u_getPropertyValueEnum(p, pname.data()); if (v == UCHAR_INVALID_CODE) { p = UCHAR_SCRIPT; v = u_getPropertyValueEnum(p, pname.data()); if (v == UCHAR_INVALID_CODE) { p = u_getPropertyEnum(pname.data()); if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) { v = 1; } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) { set(MIN_VALUE, MAX_VALUE); return *this; } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) { set(0, 0x7F); return *this; } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) { // [:Assigned:]=[:^Cn:] p = UCHAR_GENERAL_CATEGORY_MASK; v = U_GC_CN_MASK; invert = TRUE; } else { FAIL(ec); } } } } applyIntPropertyValue(p, v, ec); if(invert) { complement(); } if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) { // mustNotBeEmpty is set to true if an empty set indicates // invalid input. ec = U_ILLEGAL_ARGUMENT_ERROR; } if (isBogus() && U_SUCCESS(ec)) { // We likely ran out of memory. AHHH! ec = U_MEMORY_ALLOCATION_ERROR; } return *this; }
/** * Implements {@link Transliterator#handleTransliterate}. */ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { // The failure mode, here and below, is to behave like Any-Null, // if either there is no name data (max len == 0) or there is no // memory (malloc() => NULL). int32_t maxLen = uprv_getMaxCharNameLength(); if (maxLen == 0) { offsets.start = offsets.limit; return; } // Accomodate the longest possible name ++maxLen; // allow for temporary trailing space char* cbuf = (char*) uprv_malloc(maxLen); if (cbuf == NULL) { offsets.start = offsets.limit; return; } UnicodeString openPat(TRUE, OPEN, -1); UnicodeString str, name; int32_t cursor = offsets.start; int32_t limit = offsets.limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int32_t mode = 0; int32_t openPos = -1; // open delim candidate pos UChar32 c; while (cursor < limit) { c = text.char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int32_t i = ICU_Utility::parsePattern(openPat, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.truncate(0); cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps::isWhiteSpace(c)) { // Ignore leading whitespace if (name.length() > 0 && name.charAt(name.length()-1) != SPACE) { name.append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.length() > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int32_t len = name.length(); // Delete trailing space, if any if (len > 0 && name.charAt(len-1) == SPACE) { --len; } if (uprv_isInvariantUString(name.getBuffer(), len)) { name.extract(0, len, cbuf, maxLen, US_INV); UErrorCode status = U_ZERO_ERROR; c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status); if (U_SUCCESS(status)) { // Lookup succeeded // assert(U16_LENGTH(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM str.truncate(0); str.append(c); text.handleReplaceBetween(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int32_t delta = cursor - openPos - str.length(); cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } // Check if c is a legal char. We assume here that // legal.contains(OPEN_DELIM) is FALSE, so when we abort a // name, we don't have to go back to openPos+1. if (legal.contains(c)) { name.append(c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.length() >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += U16_LENGTH(c); } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; uprv_free(cbuf); }
UChar32 __hs_u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode) { return u_charFromName(nameChoice, name, pErrorCode); }