Exemplo n.º 1
0
Arquivo: icu.c Projeto: IvoNet/calibre
// get_available_transliterators {{{
static PyObject *
icu_get_available_transliterators(PyObject *self, PyObject *args) {
    PyObject *ans, *l;
    UErrorCode status = U_ZERO_ERROR;
    const UChar *id = NULL;
    UEnumeration *i;

    ans = PyList_New(0);
    if (ans == NULL) return PyErr_NoMemory();

    i = utrans_openIDs(&status);
    if (i == NULL || U_FAILURE(status)) {Py_DECREF(ans); PyErr_SetString(PyExc_RuntimeError, "Failed to create enumerator"); return NULL; }

    do {
        id = uenum_unext(i, NULL, &status);
        if (U_SUCCESS(status) && id != NULL) {
            l = uchar_to_unicode(id, -1);
            if (l == NULL) break;
            PyList_Append(ans, l);
            Py_DECREF(l);
        }
    } while(id != NULL);
    uenum_close(i);

    return ans;
}
Exemplo n.º 2
0
const UnicodeString* UStringEnumeration::snext(UErrorCode& status) {
    int32_t length;
    const UChar* str = uenum_unext(uenum, &length, &status);
    if (str == 0 || U_FAILURE(status)) {
        return 0;
    }
    return &unistr.setTo(str, length);
}
Exemplo n.º 3
0
static void EmptyEnumerationTest(void) {
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration *emptyEnum = uprv_malloc(sizeof(UEnumeration));

    uprv_memcpy(emptyEnum, &emptyEnumerator, sizeof(UEnumeration));
    if (uenum_count(emptyEnum, &status) != -1 || status != U_UNSUPPORTED_ERROR) {
        log_err("uenum_count failed\n");
    }
    status = U_ZERO_ERROR;
    if (uenum_next(emptyEnum, NULL, &status) != NULL || status != U_UNSUPPORTED_ERROR) {
        log_err("uenum_next failed\n");
    }
    status = U_ZERO_ERROR;
    if (uenum_unext(emptyEnum, NULL, &status) != NULL || status != U_UNSUPPORTED_ERROR) {
        log_err("uenum_unext failed\n");
    }
    status = U_ZERO_ERROR;
    uenum_reset(emptyEnum, &status);
    if (status != U_UNSUPPORTED_ERROR) {
        log_err("uenum_reset failed\n");
    }
    uenum_close(emptyEnum);

    status = U_ZERO_ERROR;
    if (uenum_next(NULL, NULL, &status) != NULL || status != U_ZERO_ERROR) {
        log_err("uenum_next(NULL) failed\n");
    }
    status = U_ZERO_ERROR;
    if (uenum_unext(NULL, NULL, &status) != NULL || status != U_ZERO_ERROR) {
        log_err("uenum_unext(NULL) failed\n");
    }
    status = U_ZERO_ERROR;
    uenum_reset(NULL, &status);
    if (status != U_ZERO_ERROR) {
        log_err("uenum_reset(NULL) failed\n");
    }

    emptyEnum = uprv_malloc(sizeof(UEnumeration));
    uprv_memcpy(emptyEnum, &emptyPartialEnumerator, sizeof(UEnumeration));
    status = U_ZERO_ERROR;
    if (uenum_unext(emptyEnum, NULL, &status) != NULL || status != U_UNSUPPORTED_ERROR) {
        log_err("partial uenum_unext failed\n");
    }
    uenum_close(emptyEnum);
}
Exemplo n.º 4
0
static String canonicalizeTimeZoneName(const String& timeZoneName)
{
    // 6.4.1 IsValidTimeZoneName (timeZone)
    // The abstract operation returns true if timeZone, converted to upper case as described in 6.1, is equal to one of the Zone or Link names of the IANA Time Zone Database, converted to upper case as described in 6.1. It returns false otherwise.
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* timeZones = ucal_openTimeZones(&status);
    ASSERT(U_SUCCESS(status));

    String canonical;
    do {
        status = U_ZERO_ERROR;
        int32_t ianaTimeZoneLength;
        // Time zone names are respresented as UChar[] in all related ICU apis.
        const UChar* ianaTimeZone = uenum_unext(timeZones, &ianaTimeZoneLength, &status);
        ASSERT(U_SUCCESS(status));

        // End of enumeration.
        if (!ianaTimeZone)
            break;

        StringView ianaTimeZoneView(ianaTimeZone, ianaTimeZoneLength);
        if (!equalIgnoringASCIICase(timeZoneName, ianaTimeZoneView))
            continue;

        // Found a match, now canonicalize.
        // 6.4.2 CanonicalizeTimeZoneName (timeZone) (ECMA-402 2.0)
        // 1. Let ianaTimeZone be the Zone or Link name of the IANA Time Zone Database such that timeZone, converted to upper case as described in 6.1, is equal to ianaTimeZone, converted to upper case as described in 6.1.
        // 2. If ianaTimeZone is a Link name, then let ianaTimeZone be the corresponding Zone name as specified in the “backward” file of the IANA Time Zone Database.

        Vector<UChar, 32> buffer(ianaTimeZoneLength);
        status = U_ZERO_ERROR;
        auto canonicalLength = ucal_getCanonicalTimeZoneID(ianaTimeZone, ianaTimeZoneLength, buffer.data(), ianaTimeZoneLength, nullptr, &status);
        if (status == U_BUFFER_OVERFLOW_ERROR) {
            buffer.grow(canonicalLength);
            status = U_ZERO_ERROR;
            ucal_getCanonicalTimeZoneID(ianaTimeZone, ianaTimeZoneLength, buffer.data(), canonicalLength, nullptr, &status);
        }
        ASSERT(U_SUCCESS(status));
        canonical = String(buffer.data(), canonicalLength);
    } while (canonical.isNull());
    uenum_close(timeZones);

    // 3. If ianaTimeZone is "Etc/UTC" or "Etc/GMT", then return "UTC".
    if (canonical == "Etc/UTC" || canonical == "Etc/GMT")
        canonical = ASCIILiteral("UTC");

    // 4. Return ianaTimeZone.
    return canonical;
}
Exemplo n.º 5
0
/**
 * call-seq:
 *     UCalendar.time_zones
 *
 * Returns array with all time zones (as UString values). 
 */
VALUE icu4r_cal_all_tz (VALUE obj)
{
	UErrorCode  status = U_ZERO_ERROR;
	UEnumeration * zones ; 
	VALUE ret ;
	UChar * name;
	int32_t len;
	zones = ucal_openTimeZones (&status);
	ICU_RAISE(status);
	ret = rb_ary_new();
	while( (name = (UChar*)uenum_unext(zones, &len, &status))) {
		rb_ary_push(ret, icu_ustr_new(name, len));
	}
	uenum_close(zones);
	return ret;
}
Exemplo n.º 6
0
static void EnumerationTest(void) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t len = 0;
    UEnumeration *en = getchArrayEnum(test1, sizeof(test1)/sizeof(test1[0]));
    const char *string = NULL;
    const UChar *uString = NULL;
    while ((string = uenum_next(en, &len, &status))) {
        log_verbose("read \"%s\", length %i\n", string, len);
    }
    uenum_reset(en, &status);
    while ((uString = uenum_unext(en, &len, &status))) {
        log_verbose("read \"%s\" (UChar), length %i\n", quikU2C(uString, len), len);
    }
    
    uenum_close(en);
}
Exemplo n.º 7
0
static UBool doTestUCharNames(const char *name, const char *standard, const char **expected, int32_t size) {
    UErrorCode err = U_ZERO_ERROR;
    UEnumeration *myEnum = ucnv_openStandardNames(name, standard, &err);
    int32_t enumCount = uenum_count(myEnum, &err);
    int32_t idx, repeatTimes = 3;
    
    if (err == U_FILE_ACCESS_ERROR) {
        log_data_err("Unable to open standard names for %s of standard: %s\n", name, standard);
        return 0;
    }
    
    if (size != enumCount) {
        log_err("FAIL: different size arrays. Got %d. Expected %d\n", enumCount, size);
        return 0;
    }
    if (size < 0 && myEnum) {
        log_err("FAIL: size < 0, but recieved an actual object\n");
        return 0;
    }
    log_verbose("\n%s %s\n", name, standard);
    while (repeatTimes-- > 0) {
        for (idx = 0; idx < enumCount; idx++) {
            UChar testName[256];
            int32_t len;
            const UChar *enumName = uenum_unext(myEnum, &len, &err);
            u_uastrncpy(testName, expected[idx], UPRV_LENGTHOF(testName));
            if (u_strcmp(enumName, testName) != 0 || U_FAILURE(err)
                || len != (int32_t)uprv_strlen(expected[idx]))
            {
                log_err("FAIL: uenum_next(%d) == \"%s\". expected \"%s\", len=%d, error=%s\n",
                    idx, enumName, testName, len, u_errorName(err));
            }
            log_verbose("%s\n", expected[idx]);
            err = U_ZERO_ERROR;
        }
        log_verbose("\n    reset\n");
        uenum_reset(myEnum, &err);
        if (U_FAILURE(err)) {
            log_err("FAIL: uenum_reset() for %s{%s} failed with %s\n",
                name, standard, u_errorName(err));
            err = U_ZERO_ERROR;
        }
    }
    uenum_close(myEnum);
    return 1;
}
Exemplo n.º 8
0
/**
 * call-seq:
 *     UCalendar.tz_for_country(country)
 *  
 * Returns array with all time zones associated with the given country.
 * Note: <code>country</code> must be value of type String.
 * Returned array content is UString's
 *
 *     UCalendar.tz_for_country("GB") # => [ "Europe/Belfast", "Europe/London", "GB",  "GB-Eire"]
 *
 */
VALUE icu4r_cal_country_tz (VALUE obj, VALUE ctry)
{
	UErrorCode  status = U_ZERO_ERROR;
	UEnumeration * zones ; 
	VALUE ret ;
	UChar * name;
	int32_t len;
	Check_Type(ctry, T_STRING);
	zones = ucal_openCountryTimeZones (RSTRING_PTR(ctry), &status) ;
	ICU_RAISE(status);
	ret = rb_ary_new();
	while( (name = (UChar*)uenum_unext(zones, &len, &status))) {
		rb_ary_push(ret, icu_ustr_new(name, len));
	}
	uenum_close(zones);
	return ret;
}
Exemplo n.º 9
0
static void DefaultNextTest(void) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t len = 0;
    UEnumeration *en = getuchArrayEnum(test2, sizeof(test2)/sizeof(test2[0]));
    const char *string = NULL;
    const UChar *uString = NULL;
    while ((uString = uenum_unext(en, &len, &status))) {
        log_verbose("read \"%s\" (UChar), length %i\n", quikU2C(uString, len), len);
    }
    if (U_FAILURE(status)) {
        log_err("FAIL: uenum_unext => %s\n", u_errorName(status));
    }
    uenum_reset(en, &status);
    while ((string = uenum_next(en, &len, &status))) {
        log_verbose("read \"%s\", length %i\n", string, len);
    }
    if (U_FAILURE(status)) {
        log_err("FAIL: uenum_next => %s\n", u_errorName(status));
    }
    
    uenum_close(en);
}
Exemplo n.º 10
0
static CFArrayRef __CFLocaleCopyUEnumerationAsArray(UEnumeration *enumer, UErrorCode *icuErr) {
    const UChar *next = NULL;
    int32_t len = 0;
    CFMutableArrayRef working = NULL;
    if (U_SUCCESS(*icuErr)) {
        working = CFArrayCreateMutable(kCFAllocatorSystemDefault, 0, &kCFTypeArrayCallBacks);
    }
    while ((next = uenum_unext(enumer, &len, icuErr)) && U_SUCCESS(*icuErr)) {
        CFStringRef string = CFStringCreateWithCharacters(kCFAllocatorSystemDefault, (const UniChar *)next, (CFIndex) len);
        CFArrayAppendValue(working, string);
        CFRelease(string);
    }
    if (*icuErr == U_INDEX_OUTOFBOUNDS_ERROR) {
        *icuErr = U_ZERO_ERROR;      // Temp: Work around bug (ICU 5220) in ucurr enumerator
    }
    CFArrayRef result = NULL;
    if (U_SUCCESS(*icuErr)) {
        result = CFArrayCreateCopy(kCFAllocatorSystemDefault, working);
    }
    if (working != NULL) {
        CFRelease(working);
    }
    return result;
}
Exemplo n.º 11
0
static void TestBuilder() {
    UErrorCode errorCode=U_ZERO_ERROR;
    UDateTimePatternGenerator *dtpg;
    UDateTimePatternConflict conflict;
    UEnumeration *en;
    UChar result[20];
    int32_t length, pLength;  
    const UChar *s, *p;
    const UChar* ptrResult[2]; 
    int32_t count=0;
    UDateTimePatternGenerator *generator;
    int32_t formattedCapacity, resultLen,patternCapacity ;
    UChar   pattern[40], formatted[40];
    UDateFormat *formatter;
    UDate sampleDate = 837039928046.0;
    static const char locale[]= "fr";
    UErrorCode status=U_ZERO_ERROR;
    
    /* test create an empty DateTimePatternGenerator */
    dtpg=udatpg_openEmpty(&errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_openEmpty() failed - %s\n", u_errorName(errorCode));
        return;
    }
    
    /* Add a pattern */
    conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20, 
                                 &length, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_addPattern() failed - %s\n", u_errorName(errorCode));
        return;
    }
    /* Add a redundant pattern */
    conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20,
                                 &length, &errorCode);
    if(conflict == UDATPG_NO_CONFLICT) {
        log_err("udatpg_addPattern() failed to find the duplicate pattern.\n");
        return;
    }
    /* Test pattern == NULL */
    s=NULL;
    length = udatpg_addPattern(dtpg, s, 0, FALSE, result, 20,
                               &length, &errorCode);
    if(!U_FAILURE(errorCode)&&(length!=0) ) {
        log_err("udatpg_addPattern failed in illegal argument - pattern is NULL.\n");
        return;
    }

    /* replace field type */
    errorCode=U_ZERO_ERROR;
    conflict = udatpg_addPattern(dtpg, testPattern2, 7, FALSE, result, 20,
                                 &length, &errorCode);
    if((conflict != UDATPG_NO_CONFLICT)||U_FAILURE(errorCode)) {
        log_err("udatpg_addPattern() failed to add HH:mm v. - %s\n", u_errorName(errorCode));
        return;
    }
    length = udatpg_replaceFieldTypes(dtpg, testPattern2, 7, replacedStr, 4,
                                      result, 20, &errorCode);
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_replaceFieldTypes failed!\n");
        return;
    }
    
    /* Get all skeletons and the crroespong pattern for each skeleton. */
    ptrResult[0] = testPattern2;
    ptrResult[1] = redundantPattern; 
    count=0;
    en = udatpg_openSkeletons(dtpg, &errorCode);  
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_openSkeletons failed!\n");
        return;
    }
    while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) {
        p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength);
        if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, ptrResult[count], pLength)!=0 ) {
            log_err("udatpg_getPatternForSkeleton failed!\n");
            return;
        }
        count++;
    }
    uenum_close(en);
    
    /* Get all baseSkeletons */
    en = udatpg_openBaseSkeletons(dtpg, &errorCode);
    count=0;
    while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) {
        p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength);
        if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, resultBaseSkeletons[count], pLength)!=0 ) {
            log_err("udatpg_getPatternForSkeleton failed!\n");
            return;
        }
        count++;
    }
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_openSkeletons failed!\n");
        return;
    }
    uenum_close(en);
    
    udatpg_close(dtpg);
    
    /* sample code in Userguide */
    patternCapacity = (int32_t)(sizeof(pattern)/sizeof((pattern)[0]));
    status=U_ZERO_ERROR;
    generator=udatpg_open(locale, &status);
    if(U_FAILURE(status)) {
        return;
    }

    /* get a pattern for an abbreviated month and day */
    length = udatpg_getBestPattern(generator, skeleton, 4,
                                   pattern, patternCapacity, &status);
    formatter = udat_open(UDAT_IGNORE, UDAT_DEFAULT, locale, timeZoneGMT, -1,
                          pattern, length, &status);
    if (formatter==NULL) {
        log_err("Failed to initialize the UDateFormat of the sample code in Userguide.\n");
        udatpg_close(generator);
        return;
    }

    /* use it to format (or parse) */
    formattedCapacity = (int32_t)(sizeof(formatted)/sizeof((formatted)[0]));
    resultLen=udat_format(formatter, ucal_getNow(), formatted, formattedCapacity,
                          NULL, &status);
    /* for French, the result is "13 sept." */

    /* cannot use the result from ucal_getNow() because the value change evreyday. */
    resultLen=udat_format(formatter, sampleDate, formatted, formattedCapacity,
                          NULL, &status);
    if ( u_memcmp(sampleFormatted, formatted, resultLen) != 0 ) {
        log_err("Failed udat_format() of sample code in Userguide.\n");
    }
    udatpg_close(generator);
    udat_close(formatter);
}
Exemplo n.º 12
0
 const UChar* operator()(int32_t* charCount) { return uenum_unext(e, charCount, status); }
Exemplo n.º 13
0
QStringList QCharsetDetector::getAllDetectableCharsets()
{
    Q_D(QCharsetDetector);

    if (!d->_allDetectableCharsets.isEmpty())
        return d->_allDetectableCharsets;

    // Codecs supported by QTextCodec (Qt 4.7):
    //
    // ISO-2022-JP JIS7 EUC-KR GB2312 Big5 Big5-ETen CP950 GB18030
    // EUC-JP Shift_JIS SJIS MS_Kanji System UTF-8 ISO-8859-1 latin1
    // CP819 IBM819 iso-ir-100 csISOLatin1 ISO-8859-15 latin9 UTF-32LE
    // UTF-32BE UTF-32 UTF-16LE UTF-16BE UTF-16 mulelao-1 roman8
    // hp-roman8 csHPRoman8 TIS-620 ISO 8859-11 WINSAMI2 WS2 Apple
    // Roman macintosh MacRoman windows-1258 CP1258 windows-1257
    // CP1257 windows-1256 CP1256 windows-1255 CP1255 windows-1254
    // CP1254 windows-1253 CP1253 windows-1252 CP1252 windows-1251
    // CP1251 windows-1250 CP1250 IBM866 CP866 csIBM866 IBM874 CP874
    // IBM850 CP850 csPC850Multilingual ISO-8859-16 iso-ir-226 latin10
    // ISO-8859-14 iso-ir-199 latin8 iso-celtic ISO-8859-13
    // ISO-8859-10 iso-ir-157 latin6 ISO-8859-10:1992 csISOLatin6
    // ISO-8859-9 iso-ir-148 latin5 csISOLatin5 ISO-8859-8 ISO
    // 8859-8-I iso-ir-138 hebrew csISOLatinHebrew ISO-8859-7 ECMA-118
    // greek iso-ir-126 csISOLatinGreek ISO-8859-6 ISO-8859-6-I
    // ECMA-114 ASMO-708 arabic iso-ir-127 csISOLatinArabic ISO-8859-5
    // cyrillic iso-ir-144 csISOLatinCyrillic ISO-8859-4 latin4
    // iso-ir-110 csISOLatin4 ISO-8859-3 latin3 iso-ir-109 csISOLatin3
    // ISO-8859-2 latin2 iso-ir-101 csISOLatin2 KOI8-U KOI8-RU KOI8-R
    // csKOI8R Iscii-Mlm Iscii-Knd Iscii-Tlg Iscii-Tml Iscii-Ori
    // Iscii-Gjr Iscii-Pnj Iscii-Bng Iscii-Dev TSCII GBK gb2312.1980-0
    // gbk-0 CP936 MS936 windows-936 jisx0201*-0 jisx0208*-0
    // ksc5601.1987-0 cp949 Big5-HKSCS big5-0 big5hkscs-0

    QStringList availableCodecsQt;
    foreach(const QByteArray &ba, QTextCodec::availableCodecs())
        availableCodecsQt << QString::fromLatin1(ba);

    // Charsets detectable by libicu 4.4.2:
    QStringList allDetectableCharsetsICU;
    allDetectableCharsetsICU
    << QLatin1String("UTF-8")
    << QLatin1String("UTF-16BE")
    << QLatin1String("UTF-16LE")
    << QLatin1String("UTF-32BE")
    << QLatin1String("UTF-32LE")
    << QLatin1String("ISO-8859-1")
    << QLatin1String("ISO-8859-2")
    << QLatin1String("ISO-8859-5")
    << QLatin1String("ISO-8859-6")
    << QLatin1String("ISO-8859-7")
    << QLatin1String("ISO-8859-8-I")
    << QLatin1String("ISO-8859-8")
    << QLatin1String("ISO-8859-9")
    << QLatin1String("KOI8-R")
    << QLatin1String("Shift_JIS")
    << QLatin1String("GB18030")
    << QLatin1String("EUC-JP")
    << QLatin1String("EUC-KR")
    << QLatin1String("Big5")
    << QLatin1String("ISO-2022-JP")
    << QLatin1String("ISO-2022-KR")
    << QLatin1String("ISO-2022-CN")
    << QLatin1String("IBM424_rtl")
    << QLatin1String("IBM424_ltr")
    << QLatin1String("IBM420_rtl")
    << QLatin1String("IBM420_ltr")
    << QLatin1String("windows-1250")
    << QLatin1String("windows-1251")
    << QLatin1String("windows-1252")
    << QLatin1String("windows-1253")
    << QLatin1String("windows-1255")
    << QLatin1String("windows-1256")
    << QLatin1String("windows-1254");

    // The charsets detectable by libicu can be determined by
    // ucsdet_getAllDetectableCharsets() and the documentation for
    // that function at
    // http://icu-project.org/apiref/icu4c/ucsdet_8h.html says:
    //
    //     “The state of the Charset detector that is passed in does
    //     not affect the result of this function, but requiring a
    //     valid, open charset detector as a parameter insures that
    //     the charset detection service has been safely initialized
    //     and that the required detection data is available.”
    //
    // but that does not seem to be completely true, in fact it
    // *does* depend on the state of the charset detector. For example
    // sometimes "windows-1250" *is* among the returned charsets.
    // This happens if some non-ASCII text
    // is in the detector and a detection is attempted and *then*
    // ucsdet_getAllDetectableCharsets() is called.
    // And sometimes "windows-1250" is *not* among the returned
    // charsets. This happens when an empty charset detector is created
    // and then ucsdet_getAllDetectableCharsets() is called.
    // If ucsdet_getAllDetectableCharsets() has been called once
    // the list of returned charsets never seems to change anymore,
    // even if the text in the detector is changed again and
    // another detection attempted which would result in a different
    // list if ucsdet_getAllDetectableCharsets() were called first
    // in that state.
    //
    // Sometimes ucsdet_getAllDetectableCharsets() reports charsets
    // multiple times depending on the number of languages it can
    // detect for that charsets, i.e. it may report ISO-8859-2 four
    // times because it can detect the languages “cs”, “hu”,
    // “pl”, and “ro” with that charset.
    //
    // This looks like a bug to me, to get a reliable list,
    // I have hardcoded the complete list of charsets which
    // ucsdet_getAllDetectableCharsets() can possibly return
    // for all states of the detector above.
    //
    // Therefore, the following code should not any extra charsets
    // anymore, at least not for libicu 4.4.2:
    clearError();
    UEnumeration *en =
        ucsdet_getAllDetectableCharsets(d->_uCharsetDetector, &(d->_status));
    if (!hasError()) {
        qint32 len;
        const UChar *uc;
        while ((uc = uenum_unext(en, &len, &(d->_status))) != NULL) {
            if(uc && !hasError())
                allDetectableCharsetsICU << QString::fromUtf16(uc, len);
        }
    }
    uenum_close(en);

    // remove all charsets not supported by QTextCodec and all duplicates:
    foreach(const QString &cs, allDetectableCharsetsICU) {
        if(availableCodecsQt.contains(cs) && !d->_allDetectableCharsets.contains(cs))
            d->_allDetectableCharsets << cs;
    }

    std::sort(d->_allDetectableCharsets.begin(), d->_allDetectableCharsets.end());

    return d->_allDetectableCharsets;
}
Exemplo n.º 14
0
static void verifyEnumeration(int line, UEnumeration *u, const char * const * compareToChar, const UChar * const * compareToUChar, int32_t expect_count) {
  UErrorCode status = U_ZERO_ERROR;
  int32_t got_count,i,len;
  const char *c;
  UChar buf[1024];

  log_verbose("%s:%d: verifying enumeration..\n", __FILE__, line);

  uenum_reset(u, &status);
  if(U_FAILURE(status)) {
    log_err("%s:%d: FAIL: could not reset char strings enumeration: %s\n", __FILE__, line, u_errorName(status));
    return;
  }

  got_count = uenum_count(u, &status);
  if(U_FAILURE(status)) {
    log_err("%s:%d: FAIL: could not count char strings enumeration: %s\n", __FILE__, line, u_errorName(status));
    return;
  }
  
  if(got_count!=expect_count) {
    log_err("%s:%d: FAIL: expect count %d got %d\n", __FILE__, line, expect_count, got_count);
  } else {
    log_verbose("%s:%d: OK: got count %d\n", __FILE__, line, got_count);
  }

  if(compareToChar!=NULL) { /* else, not invariant */
    for(i=0;i<got_count;i++) {
      c = uenum_next(u,&len, &status);
      if(U_FAILURE(status)) {
        log_err("%s:%d: FAIL: could not iterate to next after %d: %s\n", __FILE__, line, i, u_errorName(status));
        return;
      }
      if(c==NULL) {
        log_err("%s:%d: FAIL: got NULL for next after %d: %s\n", __FILE__, line, i, u_errorName(status));
        return;
      }
      
      if(strcmp(c,compareToChar[i])) {
        log_err("%s:%d: FAIL: string #%d expected '%s' got '%s'\n", __FILE__, line, i, compareToChar[i], c);
      } else {
        log_verbose("%s:%d: OK: string #%d got '%s'\n", __FILE__, line, i, c);
      }
      
      if(len!=strlen(compareToChar[i])) {
        log_err("%s:%d: FAIL: string #%d expected len %d got %d\n", __FILE__, line, i, strlen(compareToChar[i]), len);
      } else {
        log_verbose("%s:%d: OK: string #%d got len %d\n", __FILE__, line, i, len);
      }
    }
  }

  /* now try U */
  uenum_reset(u, &status);
  if(U_FAILURE(status)) {
    log_err("%s:%d: FAIL: could not reset again char strings enumeration: %s\n", __FILE__, line, u_errorName(status));
    return;
  }

  for(i=0;i<got_count;i++) {
    const UChar *ustr = uenum_unext(u,&len, &status);
    if(U_FAILURE(status)) {
      log_err("%s:%d: FAIL: could not iterate to unext after %d: %s\n", __FILE__, line, i, u_errorName(status));
      return;
    }
    if(ustr==NULL) {
      log_err("%s:%d: FAIL: got NULL for unext after %d: %s\n", __FILE__, line, i, u_errorName(status));
      return;
    }
    if(compareToChar!=NULL) {
      u_charsToUChars(compareToChar[i], buf, strlen(compareToChar[i])+1);
      if(u_strncmp(ustr,buf,len)) {
        int j;
        log_err("%s:%d: FAIL: ustring #%d expected '%s' got '%s'\n", __FILE__, line, i, compareToChar[i], austrdup(ustr));
        for(j=0;ustr[j]&&buf[j];j++) {
          log_verbose("  @ %d\t<U+%04X> vs <U+%04X>\n", j, ustr[j],buf[j]);
        }
      } else {
        log_verbose("%s:%d: OK: ustring #%d got '%s'\n", __FILE__, line, i, compareToChar[i]);
      }
      
      if(len!=strlen(compareToChar[i])) {
        log_err("%s:%d: FAIL: ustring #%d expected len %d got %d\n", __FILE__, line, i, strlen(compareToChar[i]), len);
      } else {
        log_verbose("%s:%d: OK: ustring #%d got len %d\n", __FILE__, line, i, len);
      }
    }

    if(compareToUChar!=NULL) {
      if(u_strcmp(ustr,compareToUChar[i])) {
        int j;
        log_err("%s:%d: FAIL: ustring #%d expected '%s' got '%s'\n", __FILE__, line, i, austrdup(compareToUChar[i]), austrdup(ustr));
        for(j=0;ustr[j]&&compareToUChar[j];j++) {
          log_verbose("  @ %d\t<U+%04X> vs <U+%04X>\n", j, ustr[j],compareToUChar[j]);
        }
      } else {
        log_verbose("%s:%d: OK: ustring #%d got '%s'\n", __FILE__, line, i, austrdup(compareToUChar[i]));
      }
      
      if(len!=u_strlen(compareToUChar[i])) {
        log_err("%s:%d: FAIL: ustring #%d expected len %d got %d\n", __FILE__, line, i, u_strlen(compareToUChar[i]), len);
      } else {
        log_verbose("%s:%d: OK: ustring #%d got len %d\n", __FILE__, line, i, len);
      }
    }
  }
}
Exemplo n.º 15
0
static inline UTransliterator *utrans_find(CFStringRef transform, UTransDirection dir, UErrorCode *error) {
    UEnumeration *uenum = NULL;
    UTransliterator *trans = NULL;
    do {
        uenum = utrans_openIDs(error);
        if (U_FAILURE(*error)) {
            DEBUG_LOG("%s", u_errorName(*error));
            break;
        }

        int32_t count = uenum_count(uenum, error);
        if (U_FAILURE(*error)) {
            DEBUG_LOG("%s", u_errorName(*error));
            break;
        }
        int32_t trans_idx = 0;
        while (trans_idx < count && trans == NULL) {
            int32_t idLen = 0;
            const UChar *uid = uenum_unext(uenum, &idLen, error);
            if (U_FAILURE(*error)) {
                DEBUG_LOG("%s", u_errorName(*error));
                break;
            }
            // this seems rather unlikely since we should have already broken
            // by the trans_idx exceeding the count
            if (uid == NULL) {
                break;
            }

            CFStringRef name = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, uid, idLen, kCFAllocatorNull);
            // It would have been nice if these stirng constants were actually defined somewhere in icu, but sadly they are runtime metadata...
            if ((CFEqual(name, CFSTR("Any-Remove")) && CFEqual(transform, kCFStringTransformStripCombiningMarks)) ||
                (CFEqual(name, CFSTR("Any-Latin")) && CFEqual(transform, kCFStringTransformToLatin)) ||
                (CFEqual(name, CFSTR("Latin-Katakana")) && CFEqual(transform, kCFStringTransformLatinKatakana)) ||
                (CFEqual(name, CFSTR("Latin-Hiragana")) && CFEqual(transform, kCFStringTransformLatinHiragana)) ||
                (CFEqual(name, CFSTR("Hiragana-Katakana")) && CFEqual(transform, kCFStringTransformHiraganaKatakana)) ||
                (CFEqual(name, CFSTR("Latin-Hangul")) && CFEqual(transform, kCFStringTransformLatinHangul)) ||
                (CFEqual(name, CFSTR("Latin-Arabic")) && CFEqual(transform, kCFStringTransformLatinArabic)) ||
                (CFEqual(name, CFSTR("Latin-Hebrew")) && CFEqual(transform, kCFStringTransformLatinHebrew)) ||
                (CFEqual(name, CFSTR("Latin-Thai")) && CFEqual(transform, kCFStringTransformLatinThai)) ||
                (CFEqual(name, CFSTR("Latin-Cyrillic")) && CFEqual(transform, kCFStringTransformLatinCyrillic)) ||
                (CFEqual(name, CFSTR("Latin-Greek")) && CFEqual(transform, kCFStringTransformLatinGreek)) ||
                (CFEqual(name, CFSTR("Any-Hex/XML")) && CFEqual(transform, kCFStringTransformToXMLHex)) ||
                (CFEqual(name, CFSTR("Any-Name")) && CFEqual(transform, kCFStringTransformToUnicodeName)) ||
                (CFEqual(name, CFSTR("Accents-Any")) && CFEqual(transform, kCFStringTransformStripDiacritics))) {
                trans = utrans_openU(uid, idLen, dir, NULL, 0, NULL, error);
            }
            CFRelease(name);
            trans_idx++;
        }
    } while (0);

    if (uenum != NULL) {
        uenum_reset(uenum, error);
        uenum_close(uenum);
    }

    if (trans == NULL && (CFEqual(transform, kCFStringTransformStripCombiningMarks) ||
                          CFEqual(transform, kCFStringTransformToLatin) ||
                          CFEqual(transform, kCFStringTransformLatinKatakana) ||
                          CFEqual(transform, kCFStringTransformLatinHiragana) ||
                          CFEqual(transform, kCFStringTransformHiraganaKatakana) ||
                          CFEqual(transform, kCFStringTransformLatinHangul) ||
                          CFEqual(transform, kCFStringTransformLatinArabic) ||
                          CFEqual(transform, kCFStringTransformLatinHebrew) ||
                          CFEqual(transform, kCFStringTransformLatinCyrillic) ||
                          CFEqual(transform, kCFStringTransformLatinGreek) ||
                          CFEqual(transform, kCFStringTransformToXMLHex) ||
                          CFEqual(transform, kCFStringTransformToUnicodeName) ||
                          CFEqual(transform, kCFStringTransformStripDiacritics))) {
        static dispatch_once_t once = 0L;
        dispatch_once(&once, ^{
            RELEASE_LOG("Unable to find transliterators in icu data: likely this is from not including the Transliterators section in building your icu.dat file");
        });
    }
Exemplo n.º 16
0
const Hashtable *
LocaleUtility::getAvailableLocaleNames(const UnicodeString & bundleID)
{
	// LocaleUtility_cache is a hash-of-hashes.  The top-level keys
	// are path strings ('bundleID') passed to
	// ures_openAvailableLocales.  The top-level values are
	// second-level hashes.  The second-level keys are result strings
	// from ures_openAvailableLocales.  The second-level values are
	// garbage ((void*)1 or other random pointer).

	UErrorCode status = U_ZERO_ERROR;
	Hashtable * cache;
	umtx_lock(NULL);
	cache = LocaleUtility_cache;
	umtx_unlock(NULL);

	if (cache == NULL)
	{
		cache = new Hashtable(status);
		if (cache == NULL || U_FAILURE(status))
		{
			return NULL; // catastrophic failure; e.g. out of memory
		}
		cache->setValueDeleter(uhash_deleteHashtable);
		Hashtable * h; // set this to final LocaleUtility_cache value
		umtx_lock(NULL);
		h = LocaleUtility_cache;
		if (h == NULL)
		{
			LocaleUtility_cache = h = cache;
			cache = NULL;
			ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
		}
		umtx_unlock(NULL);
		if (cache != NULL)
		{
			delete cache;
		}
		cache = h;
	}

	U_ASSERT(cache != NULL);

	Hashtable * htp;
	umtx_lock(NULL);
	htp = (Hashtable *) cache->get(bundleID);
	umtx_unlock(NULL);

	if (htp == NULL)
	{
		htp = new Hashtable(status);
		if (htp && U_SUCCESS(status))
		{
			CharString cbundleID;
			cbundleID.appendInvariantChars(bundleID, status);
			const char * path = cbundleID.isEmpty() ? NULL : cbundleID.data();
			UEnumeration * uenum = ures_openAvailableLocales(path, &status);
			for (;;)
			{
				const UChar * id = uenum_unext(uenum, NULL, &status);
				if (id == NULL)
				{
					break;
				}
				htp->put(UnicodeString(id), (void *)htp, status);
			}
			uenum_close(uenum);
			if (U_FAILURE(status))
			{
				delete htp;
				return NULL;
			}
			umtx_lock(NULL);
			cache->put(bundleID, (void *)htp, status);
			umtx_unlock(NULL);
		}
	}
	return htp;
}
Exemplo n.º 17
0
static void TestUnicodeIDs() {
    UEnumeration *uenum;
    UTransliterator *utrans;
    const UChar *id, *id2;
    int32_t idLength, id2Length, count, count2;

    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    uenum=utrans_openIDs(&errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
        return;
    }

    count=uenum_count(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
    }

    count=0;
    for(;;) {
        id=uenum_unext(uenum, &idLength, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
            break;
        }
        if(id==NULL) {
            break;
        }

        if(++count>10) {
            /* try to actually open only a few transliterators */
            continue;
        }

        utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
            continue;
        }

        id2=utrans_getUnicodeID(utrans, &id2Length);
        if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
            log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
        }

        utrans_close(utrans);
    }

    uenum_reset(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
    } else {
        count2=uenum_count(uenum, &errorCode);
        if(U_FAILURE(errorCode) || count<1) {
            log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
        } else if(count!=count2) {
            log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
        }
    }

    uenum_close(uenum);
}