Exemplo n.º 1
0
U_NAMESPACE_BEGIN

CStr::CStr(const UnicodeString &in) {
    UErrorCode status = U_ZERO_ERROR;
#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
    int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
    int32_t resultCapacity = 0;
    char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
    if (U_SUCCESS(status)) {
        in.extract(0, in.length(), buf, resultCapacity);
        s.append(buf, length, status);
    }
#else
    // No conversion available. Convert any invariant characters; substitute '?' for the rest.
    // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the 
    //       whole string because they require that the entire input be invariant.
    char buf[2];
    for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
        if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
            u_UCharsToChars(in.getBuffer()+i, buf, 1);
        } else {
            buf[0] = '?';
        }
        s.append(buf, 1, status);
    }
#endif
}
Exemplo n.º 2
0
/*
 * Check for the alias from the string or alias resource res.
 */
static void
checkAlias(const char *itemName,
           Resource res, const UChar *alias, int32_t length, UBool useResSuffix,
           CheckDependency check, void *context, UErrorCode *pErrorCode) {
    int32_t i;

    if(!uprv_isInvariantUString(alias, length)) {
        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
                        itemName, res);
        *pErrorCode=U_INVALID_CHAR_FOUND;
        return;
    }

    // extract the locale ID from alias strings like
    // locale_ID/key1/key2/key3
    // locale_ID

    // search for the first slash
    for(i=0; i<length && alias[i]!=SLASH; ++i) {}

    if(res_getPublicType(res)==URES_ALIAS) {
        // ignore aliases with an initial slash:
        // /ICUDATA/... and /pkgname/... go to a different package
        // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
        if(i==0) {
            return; // initial slash ('/')
        }

        // ignore the intra-bundle path starting from the first slash ('/')
        length=i;
    } else /* URES_STRING */ {
        // the whole string should only consist of a locale ID
        if(i!=length) {
            fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
                            itemName, res);
            *pErrorCode=U_UNSUPPORTED_ERROR;
            return;
        }
    }

    // convert the Unicode string to char *
    char localeID[32];
    if(length>=(int32_t)sizeof(localeID)) {
        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
                        itemName, res, (long)length);
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        return;
    }
    u_UCharsToChars(alias, localeID, length);
    localeID[length]=0;

    checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
}
Exemplo n.º 3
0
U_CAPI UResourceBundle * U_EXPORT2
ures_openU(const UChar *myPath, 
           const char *localeID, 
           UErrorCode *status)
{
    char pathBuffer[1024];
    int32_t length;
    char *path = pathBuffer;

    if(status==NULL || U_FAILURE(*status)) {
        return NULL;
    }
    if(myPath==NULL) {
        path = NULL;
    }
    else {
        length=u_strlen(myPath);
        if(length>=sizeof(pathBuffer)) {
            *status=U_ILLEGAL_ARGUMENT_ERROR;
            return NULL;
        } else if(uprv_isInvariantUString(myPath, length)) {
            /*
             * the invariant converter is sufficient for package and tree names
             * and is more efficient
             */
            u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */
        } else {
#if !UCONFIG_NO_CONVERSION
            /* use the default converter to support variant-character paths */
            UConverter *cnv=u_getDefaultConverter(status);
            length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status);
            u_releaseDefaultConverter(cnv);
            if(U_FAILURE(*status)) {
                return NULL;
            }
            if(length>=sizeof(pathBuffer)) {
                /* not NUL-terminated - path too long */
                *status=U_ILLEGAL_ARGUMENT_ERROR;
                return NULL;
            }
#else
            /* the default converter is not available */
            *status=U_UNSUPPORTED_ERROR;
            return NULL;
#endif
        }
    }

    return ures_open(path, localeID, status);
}
Exemplo n.º 4
0
/**
 * Return the script code for a given name, or -1 if not found.
 */
static UScriptCode scriptNameToCode(const UnicodeString& name) {
    char buf[128];
    UScriptCode code;
    UErrorCode ec = U_ZERO_ERROR;
    int32_t nameLen = name.length();
    UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen);
    
    if (isInvariant) {
        name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV);
        buf[127] = 0;   // Make sure that we NULL terminate the string.
    }
    if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec))
    {
        code = USCRIPT_INVALID_CODE;
    }
    return code;
}
Exemplo n.º 5
0
/* test invariant-character handling */
static void
TestInvariant() {
    /* all invariant graphic chars and some control codes (not \n!) */
    const char invariantChars[]=
        "\t\r \"%&'()*+,-./"
        "0123456789:;<=>?"
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
        "abcdefghijklmnopqrstuvwxyz";

    const UChar invariantUChars[]={
        9, 0xd, 0x20, 0x22, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
        0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f,
        0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
        0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0
    };

    const char variantChars[]="\n!#$@[\\]^`{|}~";

    const UChar variantUChars[]={
        0x0a, 0x21, 0x23, 0x24, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, 0x60, 0x7b, 0x7c, 0x7d, 0x7e, 0
    };

    const UChar nonASCIIUChars[]={ 0x80, 0xa0, 0x900, 0xff51 };

    UChar us[120];
    char cs[120];

    int32_t i, length;

    /* make sure that all invariant characters convert both ways */
    length=sizeof(invariantChars);
    u_charsToUChars(invariantChars, us, length);
    if(u_strcmp(us, invariantUChars)!=0) {
        log_err("u_charsToUChars(invariantChars) failed\n");
    }

    u_UCharsToChars(invariantUChars, cs, length);
    if(strcmp(cs, invariantChars)!=0) {
        log_err("u_UCharsToChars(invariantUChars) failed\n");
    }


    /*
     * make sure that variant characters convert from source code literals to Unicode
     * but not back to char *
     */
    length=sizeof(variantChars);
    u_charsToUChars(variantChars, us, length);
    if(u_strcmp(us, variantUChars)!=0) {
        log_err("u_charsToUChars(variantChars) failed\n");
    }

#ifdef NDEBUG
    /*
     * Test u_UCharsToChars(variantUChars) only in release mode because it will
     * cause an assertion failure in debug builds.
     */
    u_UCharsToChars(variantUChars, cs, length);
    for(i=0; i<length; ++i) {
        if(cs[i]!=0) {
            log_err("u_UCharsToChars(variantUChars) converted the %d-th character to %02x instead of 00\n", i, cs[i]);
        }
    }
#endif

    /*
     * Verify that invariant characters roundtrip from Unicode to the
     * default converter and back.
     */
    {
        UConverter *cnv;
        UErrorCode errorCode;

        errorCode=U_ZERO_ERROR;
        cnv=ucnv_open(NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("unable to open the default converter\n");
        } else {
            length=ucnv_fromUChars(cnv, cs, sizeof(cs), invariantUChars, -1, &errorCode);
            if(U_FAILURE(errorCode)) {
                log_err("ucnv_fromUChars(invariantUChars) failed - %s\n", u_errorName(errorCode));
            } else if(length!=sizeof(invariantChars)-1 || strcmp(cs, invariantChars)!=0) {
                log_err("ucnv_fromUChars(invariantUChars) failed\n");
            }

            errorCode=U_ZERO_ERROR;
            length=ucnv_toUChars(cnv, us, LENGTHOF(us), invariantChars, -1, &errorCode);
            if(U_FAILURE(errorCode)) {
                log_err("ucnv_toUChars(invariantChars) failed - %s\n", u_errorName(errorCode));
            } else if(length!=LENGTHOF(invariantUChars)-1 || u_strcmp(us, invariantUChars)!=0) {
                log_err("ucnv_toUChars(invariantChars) failed\n");
            }

            ucnv_close(cnv);
        }
    }

    /* API tests */
    if(!uprv_isInvariantString(invariantChars, -1)) {
        log_err("uprv_isInvariantString(invariantChars) failed\n");
    }
    if(!uprv_isInvariantUString(invariantUChars, -1)) {
        log_err("uprv_isInvariantUString(invariantUChars) failed\n");
    }
    if(!uprv_isInvariantString(invariantChars+strlen(invariantChars), 1)) {
        log_err("uprv_isInvariantString(\"\\0\") failed\n");
    }

    for(i=0; i<(sizeof(variantChars)-1); ++i) {
        if(uprv_isInvariantString(variantChars+i, 1)) {
            log_err("uprv_isInvariantString(variantChars[%d]) failed\n", i);
        }
        if(uprv_isInvariantUString(variantUChars+i, 1)) {
            log_err("uprv_isInvariantUString(variantUChars[%d]) failed\n", i);
        }
    }

    for(i=0; i<LENGTHOF(nonASCIIUChars); ++i) {
        if(uprv_isInvariantUString(nonASCIIUChars+i, 1)) {
            log_err("uprv_isInvariantUString(nonASCIIUChars[%d]) failed\n", i);
        }
    }
}
Exemplo n.º 6
0
U_CAPI int32_t U_EXPORT2
udata_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
    char dataFormatChars[4];
    const UDataInfo *pInfo;
    int32_t i, swappedLength;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /*
     * Preflight the header first; checks for illegal arguments, too.
     * Do not swap the header right away because the format-specific swapper
     * will swap it, get the headerSize again, and also use the header
     * information. Otherwise we would have to pass some of the information
     * and not be able to use the UDataSwapFn signature.
     */
    udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);

    /*
     * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
     * then we could check here for further known magic values and structures.
     */
    if(U_FAILURE(*pErrorCode)) {
        return 0; /* the data format was not recognized */
    }

    pInfo=(const UDataInfo *)((const char *)inData+4);

    {
        /* convert the data format from ASCII to Unicode to the system charset */
        UChar u[4]={
             pInfo->dataFormat[0], pInfo->dataFormat[1],
             pInfo->dataFormat[2], pInfo->dataFormat[3]
        };

        if(uprv_isInvariantUString(u, 4)) {
            u_UCharsToChars(u, dataFormatChars, 4);
        } else {
            dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
        }
    }

    /* dispatch to the swap function for the dataFormat */
    for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
        if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
            swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);

            if(U_FAILURE(*pErrorCode)) {
                udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
                                 dataFormatChars[0], dataFormatChars[1],
                                 dataFormatChars[2], dataFormatChars[3],
                                 u_errorName(*pErrorCode));
            } else if(swappedLength<(length-15)) {
                /* swapped less than expected */
                udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
                                 swappedLength, length,
                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
                                 dataFormatChars[0], dataFormatChars[1],
                                 dataFormatChars[2], dataFormatChars[3],
                                 u_errorName(*pErrorCode));
            }

            return swappedLength;
        }
    }

    /* the dataFormat was not recognized */
    udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
                     pInfo->dataFormat[0], pInfo->dataFormat[1],
                     pInfo->dataFormat[2], pInfo->dataFormat[3],
                     dataFormatChars[0], dataFormatChars[1],
                     dataFormatChars[2], dataFormatChars[3]);

    *pErrorCode=U_UNSUPPORTED_ERROR;
    return 0;
}
Exemplo n.º 7
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                    UBool isIncremental) const {
    // The failure mode, here and below, is to behave like Any-Null,
    // if either there is no name data (max len == 0) or there is no
    // memory (malloc() => NULL).

    int32_t maxLen = uprv_getMaxCharNameLength();
    if (maxLen == 0) {
        offsets.start = offsets.limit;
        return;
    }

    // Accomodate the longest possible name
    ++maxLen; // allow for temporary trailing space
    char* cbuf = (char*) uprv_malloc(maxLen);
    if (cbuf == NULL) {
        offsets.start = offsets.limit;
        return;
    }

    UnicodeString openPat(TRUE, OPEN, -1);
    UnicodeString str, name;

    int32_t cursor = offsets.start;
    int32_t limit = offsets.limit;

    // Modes:
    // 0 - looking for open delimiter
    // 1 - after open delimiter
    int32_t mode = 0;
    int32_t openPos = -1; // open delim candidate pos

    UChar32 c;
    while (cursor < limit) {
        c = text.char32At(cursor);

        switch (mode) {
        case 0: // looking for open delimiter
            if (c == OPEN_DELIM) { // quick check first
                openPos = cursor;
                int32_t i =
                    ICU_Utility::parsePattern(openPat, text, cursor, limit);
                if (i >= 0 && i < limit) {
                    mode = 1;
                    name.truncate(0);
                    cursor = i;
                    continue; // *** reprocess char32At(cursor)
                }
            }
            break;

        case 1: // after open delimiter
            // Look for legal chars.  If \s+ is found, convert it
            // to a single space.  If closeDelimiter is found, exit
            // the loop.  If any other character is found, exit the
            // loop.  If the limit is reached, exit the loop.

            // Convert \s+ => SPACE.  This assumes there are no
            // runs of >1 space characters in names.
            if (PatternProps::isWhiteSpace(c)) {
                // Ignore leading whitespace
                if (name.length() > 0 &&
                    name.charAt(name.length()-1) != SPACE) {
                    name.append(SPACE);
                    // If we are too long then abort.  maxLen includes
                    // temporary trailing space, so use '>'.
                    if (name.length() > maxLen) {
                        mode = 0;
                    }
                }
                break;
            }

            if (c == CLOSE_DELIM) {
                int32_t len = name.length();

                // Delete trailing space, if any
                if (len > 0 &&
                    name.charAt(len-1) == SPACE) {
                    --len;
                }

                if (uprv_isInvariantUString(name.getBuffer(), len)) {
                    name.extract(0, len, cbuf, maxLen, US_INV);

                    UErrorCode status = U_ZERO_ERROR;
                    c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
                    if (U_SUCCESS(status)) {
                        // Lookup succeeded

                        // assert(U16_LENGTH(CLOSE_DELIM) == 1);
                        cursor++; // advance over CLOSE_DELIM

                        str.truncate(0);
                        str.append(c);
                        text.handleReplaceBetween(openPos, cursor, str);

                        // Adjust indices for the change in the length of
                        // the string.  Do not assume that str.length() ==
                        // 1, in case of surrogates.
                        int32_t delta = cursor - openPos - str.length();
                        cursor -= delta;
                        limit -= delta;
                        // assert(cursor == openPos + str.length());
                    }
                }
                // If the lookup failed, we leave things as-is and
                // still switch to mode 0 and continue.
                mode = 0;
                openPos = -1; // close off candidate
                continue; // *** reprocess char32At(cursor)
            }

            // Check if c is a legal char.  We assume here that
            // legal.contains(OPEN_DELIM) is FALSE, so when we abort a
            // name, we don't have to go back to openPos+1.
            if (legal.contains(c)) {
                name.append(c);
                // If we go past the longest possible name then abort.
                // maxLen includes temporary trailing space, so use '>='.
                if (name.length() >= maxLen) {
                    mode = 0;
                }
            }

            // Invalid character
            else {
                --cursor; // Backup and reprocess this character
                mode = 0;
            }

            break;
        }

        cursor += U16_LENGTH(c);
    }

    offsets.contextLimit += limit - offsets.limit;
    offsets.limit = limit;
    // In incremental mode, only advance the cursor up to the last
    // open delimiter candidate.
    offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;

    uprv_free(cbuf);
}
Exemplo n.º 8
0
UnicodeSet&
UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                               const UnicodeString& value,
                               UErrorCode& ec) {
    if (U_FAILURE(ec) || isFrozen()) return *this;

    // prop and value used to be converted to char * using the default
    // converter instead of the invariant conversion.
    // This should not be necessary because all Unicode property and value
    // names use only invariant characters.
    // If there are any variant characters, then we won't find them anyway.
    // Checking first avoids assertion failures in the conversion.
    if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) ||
        !uprv_isInvariantUString(value.getBuffer(), value.length())
    ) {
        FAIL(ec);
    }
    CharString pname, vname;
    pname.appendInvariantChars(prop, ec);
    vname.appendInvariantChars(value, ec);
    if (U_FAILURE(ec)) return *this;

    UProperty p;
    int32_t v;
    UBool mustNotBeEmpty = FALSE, invert = FALSE;

    if (value.length() > 0) {
        p = u_getPropertyEnum(pname.data());
        if (p == UCHAR_INVALID_CODE) FAIL(ec);

        // Treat gc as gcm
        if (p == UCHAR_GENERAL_CATEGORY) {
            p = UCHAR_GENERAL_CATEGORY_MASK;
        }

        if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) ||
            (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) ||
            (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) {
            v = u_getPropertyValueEnum(p, vname.data());
            if (v == UCHAR_INVALID_CODE) {
                // Handle numeric CCC
                if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
                    p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
                    p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
                    char* end;
                    double value = uprv_strtod(vname.data(), &end);
                    v = (int32_t) value;
                    if (v != value || v < 0 || *end != 0) {
                        // non-integral or negative value, or trailing junk
                        FAIL(ec);
                    }
                    // If the resultant set is empty then the numeric value
                    // was invalid.
                    mustNotBeEmpty = TRUE;
                } else {
                    FAIL(ec);
                }
            }
        }

        else {

            switch (p) {
            case UCHAR_NUMERIC_VALUE:
                {
                    char* end;
                    double value = uprv_strtod(vname.data(), &end);
                    if (*end != 0) {
                        FAIL(ec);
                    }
                    applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
                    return *this;
                }
            case UCHAR_NAME:
                {
                    // Must munge name, since u_charFromName() does not do
                    // 'loose' matching.
                    char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
                    if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
                    UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec);
                    if (U_SUCCESS(ec)) {
                        clear();
                        add(ch);
                        return *this;
                    } else {
                        FAIL(ec);
                    }
                }
            case UCHAR_UNICODE_1_NAME:
                // ICU 49 deprecates the Unicode_1_Name property APIs.
                FAIL(ec);
            case UCHAR_AGE:
                {
                    // Must munge name, since u_versionFromString() does not do
                    // 'loose' matching.
                    char buf[128];
                    if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
                    UVersionInfo version;
                    u_versionFromString(version, buf);
                    applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec);
                    return *this;
                }
            case UCHAR_SCRIPT_EXTENSIONS:
                v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data());
                if (v == UCHAR_INVALID_CODE) {
                    FAIL(ec);
                }
                // fall through to calling applyIntPropertyValue()
                break;
            default:
                // p is a non-binary, non-enumerated property that we
                // don't support (yet).
                FAIL(ec);
            }
        }
    }

    else {
        // value is empty.  Interpret as General Category, Script, or
        // Binary property.
        p = UCHAR_GENERAL_CATEGORY_MASK;
        v = u_getPropertyValueEnum(p, pname.data());
        if (v == UCHAR_INVALID_CODE) {
            p = UCHAR_SCRIPT;
            v = u_getPropertyValueEnum(p, pname.data());
            if (v == UCHAR_INVALID_CODE) {
                p = u_getPropertyEnum(pname.data());
                if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) {
                    v = 1;
                } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) {
                    set(MIN_VALUE, MAX_VALUE);
                    return *this;
                } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) {
                    set(0, 0x7F);
                    return *this;
                } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) {
                    // [:Assigned:]=[:^Cn:]
                    p = UCHAR_GENERAL_CATEGORY_MASK;
                    v = U_GC_CN_MASK;
                    invert = TRUE;
                } else {
                    FAIL(ec);
                }
            }
        }
    }

    applyIntPropertyValue(p, v, ec);
    if(invert) {
        complement();
    }

    if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) {
        // mustNotBeEmpty is set to true if an empty set indicates
        // invalid input.
        ec = U_ILLEGAL_ARGUMENT_ERROR;
    }

    if (isBogus() && U_SUCCESS(ec)) {
        // We likely ran out of memory. AHHH!
        ec = U_MEMORY_ALLOCATION_ERROR;
    }
    return *this;
}
Exemplo n.º 9
0
const UChar* U_EXPORT2
ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return NULL;
    }

    if (tzid.isBogus() || tzid.length() > ZID_KEY_MAX) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    // Checking the cached results
    umtx_initOnce(gCanonicalIDCacheInitOnce, &initCanonicalIDCache, status);
    if (U_FAILURE(status)) {
        return NULL;
    }

    const UChar *canonicalID = NULL;

    UErrorCode tmpStatus = U_ZERO_ERROR;
    UChar utzid[ZID_KEY_MAX + 1];
    tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus);
    U_ASSERT(tmpStatus == U_ZERO_ERROR);    // we checked the length of tzid already

    if (!uprv_isInvariantUString(utzid, -1)) {
        // All of known tz IDs are only containing ASCII invariant characters.
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    // Check if it was already cached
    umtx_lock(gZoneMetaLock());
    {
        canonicalID = (const UChar *)uhash_get(gCanonicalIDCache, utzid);
    }
    umtx_unlock(gZoneMetaLock());

    if (canonicalID != NULL) {
        return canonicalID;
    }

    // If not, resolve CLDR canonical ID with resource data
    UBool isInputCanonical = FALSE;
    char id[ZID_KEY_MAX + 1];
    tzid.extract(0, 0x7fffffff, id, UPRV_LENGTHOF(id), US_INV);

    // replace '/' with ':'
    char *p = id;
    while (*p++) {
        if (*p == '/') {
            *p = ':';
        }
    }

    UResourceBundle *top = ures_openDirect(NULL, gKeyTypeData, &tmpStatus);
    UResourceBundle *rb = ures_getByKey(top, gTypeMapTag, NULL, &tmpStatus);
    ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus);
    ures_getByKey(rb, id, rb, &tmpStatus);
    if (U_SUCCESS(tmpStatus)) {
        // type entry (canonical) found
        // the input is the canonical ID. resolve to const UChar*
        canonicalID = TimeZone::findID(tzid);
        isInputCanonical = TRUE;
    }

    if (canonicalID == NULL) {
        // If a map element not found, then look for an alias
        tmpStatus = U_ZERO_ERROR;
        ures_getByKey(top, gTypeAliasTag, rb, &tmpStatus);
        ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus);
        const UChar *canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus);
        if (U_SUCCESS(tmpStatus)) {
            // canonical map found
            canonicalID = canonical;
        }

        if (canonicalID == NULL) {
            // Dereference the input ID using the tz data
            const UChar *derefer = TimeZone::dereferOlsonLink(tzid);
            if (derefer == NULL) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            } else {
                int32_t len = u_strlen(derefer);
                u_UCharsToChars(derefer,id,len);
                id[len] = (char) 0; // Make sure it is null terminated.

                // replace '/' with ':'
                char *q = id;
                while (*q++) {
                    if (*q == '/') {
                        *q = ':';
                    }
                }

                // If a dereference turned something up then look for an alias.
                // rb still points to the alias table, so we don't have to go looking
                // for it.
                tmpStatus = U_ZERO_ERROR;
                canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus);
                if (U_SUCCESS(tmpStatus)) {
                    // canonical map for the dereferenced ID found
                    canonicalID = canonical;
                } else {
                    canonicalID = derefer;
                    isInputCanonical = TRUE;
                }
            }
        }
    }
    ures_close(rb);
    ures_close(top);

    if (U_SUCCESS(status)) {
        U_ASSERT(canonicalID != NULL);  // canocanilD must be non-NULL here

        // Put the resolved canonical ID to the cache
        umtx_lock(gZoneMetaLock());
        {
            const UChar* idInCache = (const UChar *)uhash_get(gCanonicalIDCache, utzid);
            if (idInCache == NULL) {
                const UChar* key = ZoneMeta::findTimeZoneID(tzid);
                U_ASSERT(key != NULL);
                if (key != NULL) {
                    idInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)key, (void *)canonicalID, &status);
                    U_ASSERT(idInCache == NULL);
                }
            }
            if (U_SUCCESS(status) && isInputCanonical) {
                // Also put canonical ID itself into the cache if not exist
                const UChar *canonicalInCache = (const UChar*)uhash_get(gCanonicalIDCache, canonicalID);
                if (canonicalInCache == NULL) {
                    canonicalInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)canonicalID, (void *)canonicalID, &status);
                    U_ASSERT(canonicalInCache == NULL);
                }
            }
        }
        umtx_unlock(gZoneMetaLock());
    }

    return canonicalID;
}
Exemplo n.º 10
0
UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id,
                                              const Locale& inLocale,
                                              UnicodeString& result) {
    UErrorCode status = U_ZERO_ERROR;

    ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);

    // Suspend checking status until later...

    result.truncate(0);

    // Normalize the ID
    UnicodeString source, target, variant;
    UBool sawSource;
    TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource);
    if (target.length() < 1) {
        // No target; malformed id
        return result;
    }
    if (variant.length() > 0) { // Change "Foo" to "/Foo"
        variant.insert(0, VARIANT_SEP);
    }
    UnicodeString ID(source);
    ID.append(TARGET_SEP).append(target).append(variant);

    // build the char* key
    if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {
        char key[200];
        uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
        int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
        ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV);

        // Try to retrieve a UnicodeString from the bundle.
        UnicodeString resString = bundle.getStringEx(key, status);

        if (U_SUCCESS(status) && resString.length() != 0) {
            return result = resString; // [sic] assign & return
        }

#if !UCONFIG_NO_FORMATTING
        // We have failed to get a name from the locale data.  This is
        // typical, since most transliterators will not have localized
        // name data.  The next step is to retrieve the MessageFormat
        // pattern from the locale data and to use it to synthesize the
        // name from the ID.

        status = U_ZERO_ERROR;
        resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);

        if (U_SUCCESS(status) && resString.length() != 0) {
            MessageFormat msg(resString, inLocale, status);
            // Suspend checking status until later...

            // We pass either 2 or 3 Formattable objects to msg.
            Formattable args[3];
            int32_t nargs;
            args[0].setLong(2); // # of args to follow
            args[1].setString(source);
            args[2].setString(target);
            nargs = 3;

            // Use display names for the scripts, if they exist
            UnicodeString s;
            length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
            for (int j=1; j<=2; ++j) {
                status = U_ZERO_ERROR;
                uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
                args[j].getString(s);
                if (uprv_isInvariantUString(s.getBuffer(), s.length())) {
                    s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV);

                    resString = bundle.getStringEx(key, status);

                    if (U_SUCCESS(status)) {
                        args[j] = resString;
                    }
                }
            }

            status = U_ZERO_ERROR;
            FieldPosition pos; // ignored by msg
            msg.format(args, nargs, result, pos, status);
            if (U_SUCCESS(status)) {
                result.append(variant);
                return result;
            }
        }
#endif
    }

    // We should not reach this point unless there is something
    // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
    // been deleted from the root RB_LOCALE_ELEMENTS resource.
    result = ID;
    return result;
}