void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) { if (U_FAILURE(status)) { return; } static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5; MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts; UErrorCode internalStatus = U_ZERO_ERROR; int32_t script_count = -1; while (TRUE) { script_count = uscript_getScriptExtensions( codePoint, scripts.getAlias(), FIRST_GUESS_SCRIPT_CAPACITY, &internalStatus); if (internalStatus == U_BUFFER_OVERFLOW_ERROR) { // Need to allocate more space if (scripts.resize(script_count) == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } internalStatus = U_ZERO_ERROR; } else { break; } } // Check if we failed for some reason other than buffer overflow if (U_FAILURE(internalStatus)) { status = internalStatus; return; } // Load the scripts into the ScriptSet and return for (int32_t i = 0; i < script_count; i++) { this->set(scripts[i], status); if (U_FAILURE(status)) { return; } } }
IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) { if (U_FAILURE(status)) { return *this; } *fIdentifier = identifier; clear(); ScriptSet scriptsForCP; UChar32 cp; for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { cp = identifier.char32At(i); // Store a representative character for each kind of decimal digit if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); } UScriptCode extensions[500]; int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status); if (U_FAILURE(status)) { return *this; } scriptsForCP.resetAll(); for (int32_t j=0; j<extensionsCount; j++) { scriptsForCP.set(extensions[j], status); } scriptsForCP.reset(USCRIPT_COMMON, status); scriptsForCP.reset(USCRIPT_INHERITED, status); switch (scriptsForCP.countMembers()) { case 0: break; case 1: // Single script, record it. fRequiredScripts->Union(scriptsForCP); break; default: if (!fRequiredScripts->intersects(scriptsForCP) && !uhash_geti(fScriptSetSet, &scriptsForCP)) { // If the set hasn't been added already, add it // (Add a copy, fScriptSetSet takes ownership of the copy.) uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status); } break; } } // Now make a final pass through ScriptSetSet to remove alternates that came before singles. // [Kana], [Kana Hira] => [Kana] // This is relatively infrequent, so doesn't have to be optimized. // We also compute any commonalities among the alternates. if (uhash_count(fScriptSetSet) > 0) { fCommonAmongAlternates->setAll(); for (int32_t it = UHASH_FIRST;;) { const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it); if (nextHashEl == NULL) { break; } ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer); // [Kana], [Kana Hira] => [Kana] if (fRequiredScripts->intersects(*next)) { uhash_removeElement(fScriptSetSet, nextHashEl); } else { fCommonAmongAlternates->intersect(*next); // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]] for (int32_t otherIt = UHASH_FIRST;;) { const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt); if (otherHashEl == NULL) { break; } ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer); if (next != other && next->contains(*other)) { uhash_removeElement(fScriptSetSet, nextHashEl); break; } } } } } if (uhash_count(fScriptSetSet) == 0) { fCommonAmongAlternates->resetAll(); } return *this; }
void TestGetScriptExtensions() { UScriptCode scripts[20]; int32_t length; UErrorCode errorCode; /* errors and overflows */ errorCode=U_PARSE_ERROR; length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(errorCode!=U_PARSE_ERROR) { log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) { log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n", (int)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) { log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n", (int)length, u_errorName(errorCode)); } /* U+063F has only a Script code, no Script_Extensions. */ errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) { log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n", (int)length, u_errorName(errorCode)); } /* invalid code points */ errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) { log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n", (int)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) { log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n", (int)length, u_errorName(errorCode)); } /* normal usage */ errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode); if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) { log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n", (int)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(U_FAILURE(errorCode) || length<3 || !scriptsContain(scripts, length, USCRIPT_ARABIC) || !scriptsContain(scripts, length, USCRIPT_SYRIAC) || !scriptsContain(scripts, length, USCRIPT_MANDAIC)) { log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n", (int)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) { log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n", (int)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode); if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) { log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n", (int)length, u_errorName(errorCode)); } }