static void process_text_file(struct config_t *p_config) { char *line = 0; char linebuf[1024]; xmlDoc *doc = xmlParseFile(p_config->conffile); xmlNode *xml_node = xmlDocGetRootElement(doc); long unsigned int token_count = 0; long unsigned int line_count = 0; UErrorCode status = U_ZERO_ERROR; if (!xml_node) { printf("Could not parse XML config file '%s' \n", p_config->conffile); exit(1); } p_config->chain = icu_chain_xml_config(xml_node, 1, &status); if (!p_config->chain || !U_SUCCESS(status)) { printf("Could not set up ICU chain from config file '%s' \n", p_config->conffile); exit(1); } if (p_config->xmloutput) fprintf(p_config->outfile, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<icu>\n" "<tokens>\n"); /* read input lines for processing */ while ((line=fgets(linebuf, sizeof(linebuf)-1, p_config->infile))) { WRBUF sw = wrbuf_alloc(); WRBUF cdata = wrbuf_alloc(); int success = icu_chain_assign_cstr(p_config->chain, line, &status); line_count++; while (success && icu_chain_next_token(p_config->chain, &status)) { if (U_FAILURE(status)) success = 0; else { size_t start, len; const char *sortkey = icu_chain_token_sortkey(p_config->chain); icu_chain_get_org_info(p_config->chain, &start, &len); wrbuf_rewind(sw); wrbuf_puts_escaped(sw, sortkey); token_count++; if (p_config->xmloutput) { fprintf(p_config->outfile, "<token id=\"%lu\" line=\"%lu\"", token_count, line_count); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_norm(p_config->chain)); fprintf(p_config->outfile, " norm=\"%s\"", wrbuf_cstr(cdata)); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_display(p_config->chain)); fprintf(p_config->outfile, " display=\"%s\"", wrbuf_cstr(cdata)); if (p_config->sortoutput) { wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, wrbuf_cstr(sw)); fprintf(p_config->outfile, " sortkey=\"%s\"", wrbuf_cstr(cdata)); } fprintf(p_config->outfile, "/>\n"); } else { fprintf(p_config->outfile, "%lu %lu '%s' '%s'", token_count, line_count, icu_chain_token_norm(p_config->chain), icu_chain_token_display(p_config->chain)); if (p_config->sortoutput) { fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw)); } if (p_config->org_output) { fprintf(p_config->outfile, " %ld+%ld", (long) start, (long) len); } fprintf(p_config->outfile, "\n"); } } } wrbuf_destroy(sw); wrbuf_destroy(cdata); } if (p_config->xmloutput) fprintf(p_config->outfile, "</tokens>\n" "</icu>\n"); icu_chain_destroy(p_config->chain); xmlFreeDoc(doc); if (line) free(line); }
static void TestOpenDirect(void) { UResourceBundle *idna_rules, *casing, *te_IN, *ne, *item; UErrorCode errorCode; /* * test that ures_openDirect() opens a resource bundle * where one can look up its own items but not fallback items * from root or similar */ errorCode=U_ZERO_ERROR; idna_rules=ures_openDirect(loadTestData(&errorCode), "idna_rules", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("ures_openDirect(\"idna_rules\") failed: %s\n", u_errorName(errorCode)); return; } if(0!=uprv_strcmp("idna_rules", ures_getLocale(idna_rules, &errorCode))) { log_err("ures_openDirect(\"idna_rules\").getLocale()!=idna_rules\n"); } errorCode=U_ZERO_ERROR; /* try an item in idna_rules, must work */ item=ures_getByKey(idna_rules, "UnassignedSet", NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err("translit_index.getByKey(local key) failed: %s\n", u_errorName(errorCode)); errorCode=U_ZERO_ERROR; } else { ures_close(item); } /* try an item in root, must fail */ item=ures_getByKey(idna_rules, "ShortLanguage", NULL, &errorCode); if(U_FAILURE(errorCode)) { errorCode=U_ZERO_ERROR; } else { log_err("idna_rules.getByKey(root key) succeeded!\n"); ures_close(item); } ures_close(idna_rules); /* now make sure that "idna_rules" will not work with ures_open() */ errorCode=U_ZERO_ERROR; idna_rules=ures_open("testdata", "idna_rules", &errorCode); if(U_FAILURE(errorCode) || errorCode==U_USING_DEFAULT_WARNING || errorCode==U_USING_FALLBACK_WARNING) { /* falling back to default or root is ok */ errorCode=U_ZERO_ERROR; } else if(0!=uprv_strcmp("idna_rules", ures_getLocale(idna_rules, &errorCode))) { /* Opening this file will work in "files mode" on Windows and the Mac, which have case insensitive file systems */ log_err("ures_open(\"idna_rules\") succeeded, should fail! Got: %s\n", u_errorName(errorCode)); } ures_close(idna_rules); /* ures_openDirect("translit_index_WronG") must fail */ idna_rules=ures_openDirect(NULL, "idna_rules_WronG", &errorCode); if(U_FAILURE(errorCode)) { errorCode=U_ZERO_ERROR; } else { log_err("ures_openDirect(\"idna_rules_WronG\") succeeded, should fail!\n"); } ures_close(idna_rules); errorCode = U_USING_FALLBACK_WARNING;; idna_rules=ures_openDirect("testdata", "idna_rules", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("ures_openDirect(\"idna_rules\") failed when U_USING_FALLBACK_WARNING was set prior to call: %s\n", u_errorName(errorCode)); return; } ures_close(idna_rules); /* * ICU 3.6 has new resource bundle syntax and data for bundles that do not * participate in locale fallback. Now, * - ures_open() works like ures_openDirect() on a bundle with a top-level * type of ":table(nofallback)" _if_ the bundle exists * - ures_open() will continue to find a root bundle if the requested one * does not exist, unlike ures_openDirect() * * Test with a different bundle than above to avoid confusion in the cache. */ /* * verify that ures_open("casing"), which now has a nofallback declaration, * does not enable fallbacks */ errorCode=U_ZERO_ERROR; casing=ures_open("testdata", "casing", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("ures_open(\"casing\") failed: %s\n", u_errorName(errorCode)); return; } errorCode=U_ZERO_ERROR; item=ures_getByKey(casing, "Info", NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err("casing.getByKey(Info) failed - %s\n", u_errorName(errorCode)); } else { ures_close(item); } errorCode=U_ZERO_ERROR; item=ures_getByKey(casing, "ShortLanguage", NULL, &errorCode); if(U_SUCCESS(errorCode)) { log_err("casing.getByKey(root key) succeeded despite nofallback declaration - %s\n", u_errorName(errorCode)); ures_close(item); } ures_close(casing); /* * verify that ures_open("ne") finds the root bundle but * ures_openDirect("ne") does not */ errorCode=U_ZERO_ERROR; ne=ures_open("testdata", "ne", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("ures_open(\"ne\") failed (expected to get root): %s\n", u_errorName(errorCode)); } if(errorCode!=U_USING_DEFAULT_WARNING || 0!=uprv_strcmp("root", ures_getLocale(ne, &errorCode))) { log_err("ures_open(\"ne\") found something other than \"root\" - %s\n", u_errorName(errorCode)); } ures_close(ne); errorCode=U_ZERO_ERROR; ne=ures_openDirect("testdata", "ne", &errorCode); if(U_SUCCESS(errorCode)) { log_data_err("ures_openDirect(\"ne\") succeeded unexpectedly\n"); ures_close(ne); } /* verify that ures_openDirect("te_IN") does not enable fallbacks */ errorCode=U_ZERO_ERROR; te_IN=ures_openDirect("testdata", "te_IN", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("ures_open(\"te_IN\") failed: %s\n", u_errorName(errorCode)); return; } errorCode=U_ZERO_ERROR; item=ures_getByKey(te_IN, "ShortLanguage", NULL, &errorCode); if(U_SUCCESS(errorCode)) { log_err("te_IN.getByKey(root key) succeeded despite use of ures_openDirect() - %s\n", u_errorName(errorCode)); ures_close(item); } ures_close(te_IN); }
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { if (isFrozen() || isBogus()) { return *this; } if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { const UCaseProps *csp = ucase_getSingleton(); { UnicodeSet foldSet(*this); UnicodeString str; USetAdder sa = { foldSet.toUSet(), _set_add, _set_addRange, _set_addString, NULL, // don't need remove() NULL // don't need removeRange() }; // start with input set to guarantee inclusion // USET_CASE: remove strings because the strings will actually be reduced (folded); // therefore, start with no strings and add only those needed if (attribute & USET_CASE_INSENSITIVE) { foldSet.strings->removeAllElements(); } int32_t n = getRangeCount(); UChar32 result; const UChar *full; int32_t locCache = 0; for (int32_t i=0; i<n; ++i) { UChar32 start = getRangeStart(i); UChar32 end = getRangeEnd(i); if (attribute & USET_CASE_INSENSITIVE) { // full case closure for (UChar32 cp=start; cp<=end; ++cp) { ucase_addCaseClosure(csp, cp, &sa); } } else { // add case mappings // (does not add long s for regular s, or Kelvin for k, for example) for (UChar32 cp=start; cp<=end; ++cp) { result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullFolding(csp, cp, &full, 0); addCaseMapping(foldSet, result, full, str); } } } if (strings != NULL && strings->size() > 0) { if (attribute & USET_CASE_INSENSITIVE) { for (int32_t j=0; j<strings->size(); ++j) { str = *(const UnicodeString *) strings->elementAt(j); str.foldCase(); if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { foldSet.add(str); // does not map to code points: add the folded string itself } } } else { Locale root(""); #if !UCONFIG_NO_BREAK_ITERATION UErrorCode status = U_ZERO_ERROR; BreakIterator *bi = BreakIterator::createWordInstance(root, status); if (U_SUCCESS(status)) { #endif const UnicodeString *pStr; for (int32_t j=0; j<strings->size(); ++j) { pStr = (const UnicodeString *) strings->elementAt(j); (str = *pStr).toLower(root); foldSet.add(str); #if !UCONFIG_NO_BREAK_ITERATION (str = *pStr).toTitle(bi, root); foldSet.add(str); #endif (str = *pStr).toUpper(root); foldSet.add(str); (str = *pStr).foldCase(); foldSet.add(str); } #if !UCONFIG_NO_BREAK_ITERATION } delete bi; #endif } } *this = foldSet; } } return *this; }
UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id, const Locale& inLocale, UnicodeString& result) { UErrorCode status = U_ZERO_ERROR; ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status); // Suspend checking status until later... result.truncate(0); // Normalize the ID UnicodeString source, target, variant; UBool sawSource; TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource); if (target.length() < 1) { // No target; malformed id return result; } if (variant.length() > 0) { // Change "Foo" to "/Foo" variant.insert(0, VARIANT_SEP); } UnicodeString ID(source); ID.append(TARGET_SEP).append(target).append(variant); // build the char* key if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) { char key[200]; uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX); int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX); ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV); // Try to retrieve a UnicodeString from the bundle. UnicodeString resString = bundle.getStringEx(key, status); if (U_SUCCESS(status) && resString.length() != 0) { return result = resString; // [sic] assign & return } #if !UCONFIG_NO_FORMATTING // We have failed to get a name from the locale data. This is // typical, since most transliterators will not have localized // name data. The next step is to retrieve the MessageFormat // pattern from the locale data and to use it to synthesize the // name from the ID. status = U_ZERO_ERROR; resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status); if (U_SUCCESS(status) && resString.length() != 0) { MessageFormat msg(resString, inLocale, status); // Suspend checking status until later... // We pass either 2 or 3 Formattable objects to msg. Formattable args[3]; int32_t nargs; args[0].setLong(2); // # of args to follow args[1].setString(source); args[2].setString(target); nargs = 3; // Use display names for the scripts, if they exist UnicodeString s; length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX); for (int j=1; j<=2; ++j) { status = U_ZERO_ERROR; uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX); args[j].getString(s); if (uprv_isInvariantUString(s.getBuffer(), s.length())) { s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV); resString = bundle.getStringEx(key, status); if (U_SUCCESS(status)) { args[j] = resString; } } } status = U_ZERO_ERROR; FieldPosition pos; // ignored by msg msg.format(args, nargs, result, pos, status); if (U_SUCCESS(status)) { result.append(variant); return result; } } #endif } // We should not reach this point unless there is something // wrong with the build or the RB_DISPLAY_NAME_PATTERN has // been deleted from the root RB_LOCALE_ELEMENTS resource. result = ID; return result; }
// Returns TRUE for "ok to continue parsing fields". UBool PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, UErrorCode &errorCode) { CharString pBuffer; const char *p=field; const char *v=strchr(p, '='); int binaryValue; if(*p=='-') { if(v!=NULL) { fprintf(stderr, "error in preparsed UCD: mix of binary-property-no and " "enum-property syntax '%s' on line %ld\n", field, (long)lineNumber); errorCode=U_PARSE_ERROR; return FALSE; } binaryValue=0; ++p; } else if(v==NULL) { binaryValue=1; } else { binaryValue=-1; // Copy out the property name rather than modifying the field (writing a NUL). pBuffer.append(p, (int32_t)(v-p), errorCode); p=pBuffer.data(); ++v; } int32_t prop=pnames->getPropertyEnum(p); if(prop<0) { for(int32_t i=0;; ++i) { if(i==UPRV_LENGTHOF(ppucdProperties)) { // Ignore unknown property names. return TRUE; } if(0==uprv_stricmp(p, ppucdProperties[i].name)) { prop=ppucdProperties[i].prop; U_ASSERT(prop>=0); break; } } } if(prop<UCHAR_BINARY_LIMIT) { if(binaryValue>=0) { props.binProps[prop]=(UBool)binaryValue; } else { // No binary value for a binary property. fprintf(stderr, "error in preparsed UCD: enum-property syntax '%s' " "for binary property on line %ld\n", field, (long)lineNumber); errorCode=U_PARSE_ERROR; } } else if(binaryValue>=0) { // Binary value for a non-binary property. fprintf(stderr, "error in preparsed UCD: binary-property syntax '%s' " "for non-binary property on line %ld\n", field, (long)lineNumber); errorCode=U_PARSE_ERROR; } else if (prop < UCHAR_INT_START) { fprintf(stderr, "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n", prop, (long)lineNumber); errorCode=U_PARSE_ERROR; } else if(prop<UCHAR_INT_LIMIT) { int32_t value=pnames->getPropertyValueEnum(prop, v); if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) { // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work. char *end; unsigned long ccc=uprv_strtoul(v, &end, 10); if(v<end && *end==0 && ccc<=254) { value=(int32_t)ccc; } } if(value==UCHAR_INVALID_CODE) { fprintf(stderr, "error in preparsed UCD: '%s' is not a valid value on line %ld\n", field, (long)lineNumber); errorCode=U_PARSE_ERROR; } else { props.intProps[prop-UCHAR_INT_START]=value; } } else if(*v=='<') { // Do not parse default values like <code point>, just set null values. switch(prop) { case UCHAR_BIDI_MIRRORING_GLYPH: props.bmg=U_SENTINEL; break; case UCHAR_BIDI_PAIRED_BRACKET: props.bpb=U_SENTINEL; break; case UCHAR_SIMPLE_CASE_FOLDING: props.scf=U_SENTINEL; break; case UCHAR_SIMPLE_LOWERCASE_MAPPING: props.slc=U_SENTINEL; break; case UCHAR_SIMPLE_TITLECASE_MAPPING: props.stc=U_SENTINEL; break; case UCHAR_SIMPLE_UPPERCASE_MAPPING: props.suc=U_SENTINEL; break; case UCHAR_CASE_FOLDING: props.cf.remove(); break; case UCHAR_LOWERCASE_MAPPING: props.lc.remove(); break; case UCHAR_TITLECASE_MAPPING: props.tc.remove(); break; case UCHAR_UPPERCASE_MAPPING: props.uc.remove(); break; case UCHAR_SCRIPT_EXTENSIONS: props.scx.clear(); break; default: fprintf(stderr, "error in preparsed UCD: '%s' is not a valid default value on line %ld\n", field, (long)lineNumber); errorCode=U_PARSE_ERROR; } } else { char c; switch(prop) { case UCHAR_NUMERIC_VALUE: props.numericValue=v; c=*v; if('0'<=c && c<='9' && v[1]==0) { props.digitValue=c-'0'; } else { props.digitValue=-1; } break; case UCHAR_NAME: props.name=v; break; case UCHAR_AGE: u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric. break; case UCHAR_BIDI_MIRRORING_GLYPH: props.bmg=parseCodePoint(v, errorCode); break; case UCHAR_BIDI_PAIRED_BRACKET: props.bpb=parseCodePoint(v, errorCode); break; case UCHAR_SIMPLE_CASE_FOLDING: props.scf=parseCodePoint(v, errorCode); break; case UCHAR_SIMPLE_LOWERCASE_MAPPING: props.slc=parseCodePoint(v, errorCode); break; case UCHAR_SIMPLE_TITLECASE_MAPPING: props.stc=parseCodePoint(v, errorCode); break; case UCHAR_SIMPLE_UPPERCASE_MAPPING: props.suc=parseCodePoint(v, errorCode); break; case UCHAR_CASE_FOLDING: parseString(v, props.cf, errorCode); break; case UCHAR_LOWERCASE_MAPPING: parseString(v, props.lc, errorCode); break; case UCHAR_TITLECASE_MAPPING: parseString(v, props.tc, errorCode); break; case UCHAR_UPPERCASE_MAPPING: parseString(v, props.uc, errorCode); break; case PPUCD_NAME_ALIAS: props.nameAlias=v; break; case PPUCD_CONDITIONAL_CASE_MAPPINGS: case PPUCD_TURKIC_CASE_FOLDING: // No need to parse their values: They are hardcoded in the runtime library. break; case UCHAR_SCRIPT_EXTENSIONS: parseScriptExtensions(v, props.scx, errorCode); break; default: // Ignore unhandled properties. return TRUE; } } if(U_SUCCESS(errorCode)) { newValues.add((UChar32)prop); return TRUE; } else { return FALSE; } }
const Normalizer2 * Normalizer2::getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } if(name==NULL || *name==0) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } const Norm2AllModes *allModes=NULL; if(packageName==NULL) { if(0==uprv_strcmp(name, "nfc")) { allModes=Norm2AllModes::getNFCInstance(errorCode); } else if(0==uprv_strcmp(name, "nfkc")) { allModes=Norm2AllModes::getNFKCInstance(errorCode); } else if(0==uprv_strcmp(name, "nfkc_cf")) { allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); } } if(allModes==NULL && U_SUCCESS(errorCode)) { { Mutex lock; if(cache!=NULL) { allModes=(Norm2AllModes *)uhash_get(cache, name); } } if(allModes==NULL) { ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); LocalPointer<Norm2AllModes> localAllModes( Norm2AllModes::createInstance(packageName, name, errorCode)); if(U_SUCCESS(errorCode)) { Mutex lock; if(cache==NULL) { cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } uhash_setKeyDeleter(cache, uprv_free); uhash_setValueDeleter(cache, deleteNorm2AllModes); } void *temp=uhash_get(cache, name); if(temp==NULL) { int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1); char *nameCopy=(char *)uprv_malloc(keyLength); if(nameCopy==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; return NULL; } uprv_memcpy(nameCopy, name, keyLength); allModes=localAllModes.getAlias(); uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); } else { // race condition allModes=(Norm2AllModes *)temp; } } } } if(allModes!=NULL && U_SUCCESS(errorCode)) { switch(mode) { case UNORM2_COMPOSE: return &allModes->comp; case UNORM2_DECOMPOSE: return &allModes->decomp; case UNORM2_FCD: return &allModes->fcd; case UNORM2_COMPOSE_CONTIGUOUS: return &allModes->fcc; default: break; // do nothing } } return NULL; }
PyMODINIT_FUNC initicu(void) { PyObject* m; UVersionInfo ver, uver; UErrorCode status = U_ZERO_ERROR; char version[U_MAX_VERSION_STRING_LENGTH+1] = {0}, uversion[U_MAX_VERSION_STRING_LENGTH+5] = {0}; if (sizeof(Py_UNICODE) != 2 && sizeof(Py_UNICODE) != 4) { PyErr_SetString(PyExc_RuntimeError, "This module only works on python versions <= 3.2"); return; } u_init(&status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_RuntimeError, u_errorName(status)); return; } u_getVersion(ver); u_versionToString(ver, version); u_getUnicodeVersion(uver); u_versionToString(uver, uversion); if (PyType_Ready(&icu_CollatorType) < 0) return; m = Py_InitModule3("icu", icu_methods, "Wrapper for the ICU internationalization library"); Py_INCREF(&icu_CollatorType); PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType); // uint8_t must be the same size as char PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0); PyModule_AddStringConstant(m, "icu_version", version); PyModule_AddStringConstant(m, "unicode_version", uversion); ADDUCONST(USET_SPAN_NOT_CONTAINED); ADDUCONST(USET_SPAN_CONTAINED); ADDUCONST(USET_SPAN_SIMPLE); ADDUCONST(UCOL_DEFAULT); ADDUCONST(UCOL_PRIMARY); ADDUCONST(UCOL_SECONDARY); ADDUCONST(UCOL_TERTIARY); ADDUCONST(UCOL_DEFAULT_STRENGTH); ADDUCONST(UCOL_QUATERNARY); ADDUCONST(UCOL_IDENTICAL); ADDUCONST(UCOL_OFF); ADDUCONST(UCOL_ON); ADDUCONST(UCOL_SHIFTED); ADDUCONST(UCOL_NON_IGNORABLE); ADDUCONST(UCOL_LOWER_FIRST); ADDUCONST(UCOL_UPPER_FIRST); ADDUCONST(UNORM_NONE); ADDUCONST(UNORM_NFD); ADDUCONST(UNORM_NFKD); ADDUCONST(UNORM_NFC); ADDUCONST(UNORM_DEFAULT); ADDUCONST(UNORM_NFKC); ADDUCONST(UNORM_FCD); }
void CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& status) { if (U_FAILURE(status)) { return; } if (fPluralCountToCurrencyUnitPattern) { deleteHash(fPluralCountToCurrencyUnitPattern); } fPluralCountToCurrencyUnitPattern = initHash(status); if (U_FAILURE(status)) { return; } UErrorCode ec = U_ZERO_ERROR; UResourceBundle *rb = ures_open(NULL, loc.getName(), &ec); rb = ures_getByKey(rb, gNumberElementsTag, rb, &ec); rb = ures_getByKey(rb, gLatnTag, rb, &ec); rb = ures_getByKey(rb, gPatternsTag, rb, &ec); int32_t ptnLen; const UChar* numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec); int32_t numberStylePatternLen = ptnLen; const UChar* negNumberStylePattern = NULL; int32_t negNumberStylePatternLen = 0; // TODO: Java // parse to check whether there is ";" separator in the numberStylePattern UBool hasSeparator = false; if (U_SUCCESS(ec)) { for (int32_t styleCharIndex = 0; styleCharIndex < ptnLen; ++styleCharIndex) { if (numberStylePattern[styleCharIndex] == gNumberPatternSeparator) { hasSeparator = true; // split the number style pattern into positive and negative negNumberStylePattern = numberStylePattern + styleCharIndex + 1; negNumberStylePatternLen = ptnLen - styleCharIndex - 1; numberStylePatternLen = styleCharIndex; } } } ures_close(rb); if (U_FAILURE(ec)) { return; } UResourceBundle *currRb = ures_open(U_ICUDATA_CURR, loc.getName(), &ec); UResourceBundle *currencyRes = ures_getByKeyWithFallback(currRb, gCurrUnitPtnTag, NULL, &ec); #ifdef CURRENCY_PLURAL_INFO_DEBUG std::cout << "in set up\n"; #endif StringEnumeration* keywords = fPluralRules->getKeywords(ec); if (U_SUCCESS(ec)) { const char* pluralCount; while ((pluralCount = keywords->next(NULL, ec)) != NULL) { if ( U_SUCCESS(ec) ) { int32_t ptnLen; UErrorCode err = U_ZERO_ERROR; const UChar* patternChars = ures_getStringByKeyWithFallback( currencyRes, pluralCount, &ptnLen, &err); if (U_SUCCESS(err) && ptnLen > 0) { UnicodeString* pattern = new UnicodeString(patternChars, ptnLen); #ifdef CURRENCY_PLURAL_INFO_DEBUG char result_1[1000]; pattern->extract(0, pattern->length(), result_1, "UTF-8"); std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif pattern->findAndReplace(gPart0, UnicodeString(numberStylePattern, numberStylePatternLen)); pattern->findAndReplace(gPart1, gTripleCurrencySign); if (hasSeparator) { UnicodeString negPattern(patternChars, ptnLen); negPattern.findAndReplace(gPart0, UnicodeString(negNumberStylePattern, negNumberStylePatternLen)); negPattern.findAndReplace(gPart1, gTripleCurrencySign); pattern->append(gNumberPatternSeparator); pattern->append(negPattern); } #ifdef CURRENCY_PLURAL_INFO_DEBUG pattern->extract(0, pattern->length(), result_1, "UTF-8"); std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount), pattern, status); } } } } delete keywords; ures_close(currencyRes); ures_close(currRb); }
//------------------------------------------------------------------------------ // // scanSet Construct a UnicodeSet from the text at the current scan // position. Advance the scan position to the first character // after the set. // // A new RBBI setref node referring to the set is pushed onto the node // stack. // // The scan position is normally under the control of the state machine // that controls rule parsing. UnicodeSets, however, are parsed by // the UnicodeSet constructor, not by the RBBI rule parser. // //------------------------------------------------------------------------------ void RBBIRuleScanner::scanSet() { UnicodeSet *uset; ParsePosition pos; int startPos; int i; if (U_FAILURE(*fRB->fStatus)) { return; } pos.setIndex(fScanIndex); startPos = fScanIndex; UErrorCode localStatus = U_ZERO_ERROR; uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE, fSymbolTable, localStatus); if (uset == NULL) { localStatus = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(localStatus)) { // TODO: Get more accurate position of the error from UnicodeSet's return info. // UnicodeSet appears to not be reporting correctly at this time. #ifdef RBBI_DEBUG RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); #endif error(localStatus); delete uset; return; } // Verify that the set contains at least one code point. // if (uset->isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. // Also, avoids having to think about corner cases in the tree manipulation code // that occurs later on. error(U_BRK_RULE_EMPTY_SET); delete uset; return; } // Advance the RBBI parse postion over the UnicodeSet pattern. // Don't just set fScanIndex because the line/char positions maintained // for error reporting would be thrown off. i = pos.getIndex(); for (;;) { if (fNextIndex >= i) { break; } nextCharLL(); } if (U_SUCCESS(*fRB->fStatus)) { RBBINode *n; n = pushNewNode(RBBINode::setRef); n->fFirstPos = startPos; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); // findSetFor() serves several purposes here: // - Adopts storage for the UnicodeSet, will be responsible for deleting. // - Mantains collection of all sets in use, needed later for establishing // character categories for run time engine. // - Eliminates mulitiple instances of the same set. // - Creates a new uset node if necessary (if this isn't a duplicate.) findSetFor(n->fText, n, uset); } }
/** * Convert the elements of the 'list' vector, which are SingleID * objects, into actual Transliterator objects. In the course of * this, some (or all) entries may be removed. If all entries * are removed, the NULL transliterator will be added. * * Delete entries with empty basicIDs; these are generated by * elements like "(A)" in the forward direction, or "A()" in * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert * SingleID entries to actual transliterators. * * @param list vector of SingleID objects. On exit, vector * of one or more Transliterators. * @return new value of insertIndex. The index will shift if * there are empty items, like "(Lower)", with indices less than * insertIndex. */ void TransliteratorIDParser::instantiateList(UVector& list, UErrorCode& ec) { UVector tlist(ec); if (U_FAILURE(ec)) { goto RETURN; } tlist.setDeleter(_deleteTransliteratorTrIDPars); Transliterator* t; int32_t i; for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size() // We run the loop too long by one, so we can // do an insert after the last element if (i==list.size()) { break; } SingleID* single = (SingleID*) list.elementAt(i); if (single->basicID.length() != 0) { t = single->createInstance(); if (t == NULL) { ec = U_INVALID_ID; goto RETURN; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; goto RETURN; } } } // An empty list is equivalent to a NULL transliterator. if (tlist.size() == 0) { t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), NULL); if (t == NULL) { // Should never happen ec = U_INTERNAL_TRANSLITERATOR_ERROR; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; } } RETURN: UObjectDeleter *save = list.setDeleter(_deleteSingleID); list.removeAllElements(); if (U_SUCCESS(ec)) { list.setDeleter(_deleteTransliteratorTrIDPars); while (tlist.size() > 0) { t = (Transliterator*) tlist.orphanElementAt(0); list.addElement(t, ec); if (U_FAILURE(ec)) { delete t; list.removeAllElements(); break; } } } list.setDeleter(save); }
U_CAPI int32_t U_EXPORT2 uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *s, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status) { // TODO: this function could be sped up a bit // Skip the input normalization when not needed, work from callers data. // Put the initial skeleton straight into the caller's destination buffer. // It probably won't need normalization. // But these would make the structure more complicated. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0; } if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL) || (type & ~(USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE)) != 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t tableMask = 0; switch (type) { case 0: tableMask = USPOOF_ML_TABLE_FLAG; break; case USPOOF_SINGLE_SCRIPT_CONFUSABLE: tableMask = USPOOF_SL_TABLE_FLAG; break; case USPOOF_ANY_CASE: tableMask = USPOOF_MA_TABLE_FLAG; break; case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE: tableMask = USPOOF_SA_TABLE_FLAG; break; default: *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } // NFD transform of the user supplied input UChar nfdStackBuf[USPOOF_STACK_BUFFER_SIZE]; UChar *nfdInput = nfdStackBuf; int32_t normalizedLen = unorm_normalize( s, length, UNORM_NFD, 0, nfdInput, USPOOF_STACK_BUFFER_SIZE, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { nfdInput = (UChar *)uprv_malloc((normalizedLen+1)*sizeof(UChar)); if (nfdInput == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } *status = U_ZERO_ERROR; normalizedLen = unorm_normalize(s, length, UNORM_NFD, 0, nfdInput, normalizedLen+1, status); } if (U_FAILURE(*status)) { if (nfdInput != nfdStackBuf) { uprv_free(nfdInput); } return 0; } // buffer to hold the Unicode defined skeleton mappings for a single code point UChar buf[USPOOF_MAX_SKELETON_EXPANSION]; // Apply the skeleton mapping to the NFD normalized input string // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. int32_t inputIndex = 0; UnicodeString skelStr; while (inputIndex < normalizedLen) { UChar32 c; U16_NEXT(nfdInput, inputIndex, normalizedLen, c); int32_t replaceLen = This->confusableLookup(c, tableMask, buf); skelStr.append(buf, replaceLen); } if (nfdInput != nfdStackBuf) { uprv_free(nfdInput); } const UChar *result = skelStr.getBuffer(); int32_t resultLen = skelStr.length(); UChar *normedResult = NULL; // Check the skeleton for NFD, normalize it if needed. // Unnormalized results should be very rare. if (!unorm_isNormalized(result, resultLen, UNORM_NFD, status)) { normalizedLen = unorm_normalize(result, resultLen, UNORM_NFD, 0, NULL, 0, status); normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar))); if (normedResult == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } *status = U_ZERO_ERROR; unorm_normalize(result, resultLen, UNORM_NFD, 0, normedResult, normalizedLen+1, status); result = normedResult; resultLen = normalizedLen; } // Copy the skeleton to the caller's buffer if (U_SUCCESS(*status)) { if (destCapacity == 0 || resultLen > destCapacity) { *status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING; } else { u_memcpy(dest, result, resultLen); if (destCapacity > resultLen) { dest[resultLen] = 0; } else { *status = U_STRING_NOT_TERMINATED_WARNING; } } } uprv_free(normedResult); return resultLen; }
int32_t ThaiBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UStack &foundBreaks ) const { if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) { return 0; // Not enough characters for two words } uint32_t wordsFound = 0; int32_t wordLength; int32_t current; UErrorCode status = U_ZERO_ERROR; PossibleWord words[THAI_LOOKAHEAD]; UChar32 uc; utext_setNativeIndex(text, rangeStart); while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { wordLength = 0; // Look for candidate words at the current position int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); // If we found exactly one, use that if (candidates == 1) { wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text); wordsFound += 1; } // If there was more than one, see which one can take us forward the most words else if (candidates > 1) { // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { goto foundBest; } do { int wordsMatched = 1; if (words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { if (wordsMatched < 2) { // Followed by another dictionary word; mark first word as a good candidate words[wordsFound%THAI_LOOKAHEAD].markCurrent(); wordsMatched = 2; } // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { goto foundBest; } // See if any of the possible second words is followed by a third word do { // If we find a third word, stop right away if (words[(wordsFound+2)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { words[wordsFound%THAI_LOOKAHEAD].markCurrent(); goto foundBest; } } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(text)); } } while (words[wordsFound%THAI_LOOKAHEAD].backUp(text)); foundBest: wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text); wordsFound += 1; } // We come here after having either found a word or not. We look ahead to the // next word. If it's not a dictionary word, we will combine it withe the word we // just found (if there is one), but only if the preceding word does not exceed // the threshold. // The text iterator should now be positioned at the end of the word we found. if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) { // if it is a dictionary word, do nothing. If it isn't, then if there is // no preceding word, or the non-word shares less than the minimum threshold // of characters with a dictionary word, then scan to resynchronize if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 && (wordLength == 0 || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) { // Look for a plausible word boundary //TODO: This section will need a rework for UText. int32_t remaining = rangeEnd - (current+wordLength); UChar32 pc = utext_current32(text); int32_t chars = 0; for (;;) { utext_next32(text); uc = utext_current32(text); // TODO: Here we're counting on the fact that the SA languages are all // in the BMP. This should get fixed with the UText rework. chars += 1; if (--remaining <= 0) { break; } if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { // Maybe. See if it's in the dictionary. // NOTE: In the original Apple code, checked that the next // two characters after uc were not 0x0E4C THANTHAKHAT before // checking the dictionary. That is just a performance filter, // but it's not clear it's faster than checking the trie. int candidates = words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); utext_setNativeIndex(text, current+wordLength+chars); if (candidates > 0) { break; } } pc = uc; } // Bump the word count if there wasn't already one if (wordLength <= 0) { wordsFound += 1; } // Update the length with the passed-over characters wordLength += chars; } else { // Back up to where we were for next iteration utext_setNativeIndex(text, current+wordLength); } } // Never stop before a combining mark. int32_t currPos; while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { utext_next32(text); wordLength += (int32_t)utext_getNativeIndex(text) - currPos; } // Look ahead for possible suffixes if a dictionary word does not follow. // We do this in code rather than using a rule so that the heuristic // resynch continues to function. For example, one of the suffix characters // could be a typo in the middle of a word. if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) { if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 && fSuffixSet.contains(uc = utext_current32(text))) { if (uc == THAI_PAIYANNOI) { if (!fSuffixSet.contains(utext_previous32(text))) { // Skip over previous end and PAIYANNOI utext_next32(text); utext_next32(text); wordLength += 1; // Add PAIYANNOI to word uc = utext_current32(text); // Fetch next character } else { // Restore prior position utext_next32(text); } } if (uc == THAI_MAIYAMOK) { if (utext_previous32(text) != THAI_MAIYAMOK) { // Skip over previous end and MAIYAMOK utext_next32(text); utext_next32(text); wordLength += 1; // Add MAIYAMOK to word } else { // Restore prior position utext_next32(text); } } } else { utext_setNativeIndex(text, current+wordLength); } } // Did we find a word on this iteration? If so, push it on the break stack if (wordLength > 0) { foundBreaks.push((current+wordLength), status); } } // Don't return a break for the end of the dictionary range if there is one there. if (foundBreaks.peeki() >= rangeEnd) { (void) foundBreaks.popi(); wordsFound -= 1; } return wordsFound; }
UBool FieldPositionIteratorHandler::isRecording(void) const { return U_SUCCESS(status); }
U_CAPI int32_t U_EXPORT2 ucnv_swapAliases(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint16_t *inTable; const uint32_t *inSectionSizes; uint32_t toc[offsetsCount]; uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ uint32_t i, count, tocLength, topOffset; TempRow rows[STACK_ROW_CAPACITY]; uint16_t resort[STACK_ROW_CAPACITY]; TempAliasTable tempTable; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ pInfo->dataFormat[1]==0x76 && pInfo->dataFormat[2]==0x41 && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==3 )) { udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* an alias table must contain at least the table of contents array */ if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); inTable=(const uint16_t *)inSectionSizes; uprv_memset(toc, 0, sizeof(toc)); toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); if(tocLength<minTocLength || offsetsCount<=tocLength) { udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } /* read the known part of the table of contents */ for(i=converterListIndex; i<=tocLength; ++i) { toc[i]=ds->readUInt32(inSectionSizes[i]); } /* compute offsets */ uprv_memset(offsets, 0, sizeof(offsets)); offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ for(i=tagListIndex; i<=tocLength; ++i) { offsets[i]=offsets[i-1]+toc[i-1]; } /* compute the overall size of the after-header data, in numbers of 16-bit units */ topOffset=offsets[i-1]+toc[i-1]; if(length>=0) { uint16_t *outTable; const uint16_t *p, *p2; uint16_t *q, *q2; uint16_t oldIndex; if((length-headerSize)<(2*(int32_t)topOffset)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTable=(uint16_t *)((char *)outData+headerSize); /* swap the entire table of contents */ ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); /* swap unormalized strings & normalized strings */ ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), outTable+offsets[stringTableIndex], pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); return 0; } if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap all 16-bit values together */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); } else { /* allocate the temporary table for sorting */ count=toc[aliasListIndex]; tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ if(count<=STACK_ROW_CAPACITY) { tempTable.rows=rows; tempTable.resort=resort; } else { tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); if(tempTable.rows==NULL) { udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", count); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } tempTable.resort=(uint16_t *)(tempTable.rows+count); } if(ds->outCharset==U_ASCII_FAMILY) { tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; } else /* U_EBCDIC_FAMILY */ { tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; } /* * Sort unique aliases+mapped names. * * We need to sort the list again by outCharset strings because they * sort differently for different charset families. * First we set up a temporary table with the string indexes and * sorting indexes and sort that. * Then we permutate and copy/swap the actual values. */ p=inTable+offsets[aliasListIndex]; q=outTable+offsets[aliasListIndex]; p2=inTable+offsets[untaggedConvArrayIndex]; q2=outTable+offsets[untaggedConvArrayIndex]; for(i=0; i<count; ++i) { tempTable.rows[i].strIndex=ds->readUInt16(p[i]); tempTable.rows[i].sortIndex=(uint16_t)i; } uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), io_compareRows, &tempTable, FALSE, pErrorCode); if(U_SUCCESS(*pErrorCode)) { /* copy/swap/permutate items */ if(p!=q) { for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); } } else { /* * If we swap in-place, then the permutation must use another * temporary array (tempTable.resort) * before the results are copied to the outBundle. */ uint16_t *r=tempTable.resort; for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q, r, 2*count); for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q2, r, 2*count); } } if(tempTable.rows!=rows) { uprv_free(tempTable.rows); } if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", count); return 0; } /* swap remaining 16-bit values */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); ds->swapArray16(ds, inTable+offsets[taggedAliasArrayIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), outTable+offsets[taggedAliasArrayIndex], pErrorCode); } } return headerSize+2*(int32_t)topOffset; }
UnicodeString& LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, UnicodeString& result) const { UnicodeString resultName; const char* lang = locale.getLanguage(); if (uprv_strlen(lang) == 0) { lang = "root"; } const char* script = locale.getScript(); const char* country = locale.getCountry(); const char* variant = locale.getVariant(); UBool hasScript = uprv_strlen(script) > 0; UBool hasCountry = uprv_strlen(country) > 0; UBool hasVariant = uprv_strlen(variant) > 0; if (dialectHandling == ULDN_DIALECT_NAMES) { char buffer[ULOC_FULLNAME_CAPACITY]; do { // loop construct is so we can break early out of search if (hasScript && hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; hasCountry = FALSE; break; } } if (hasScript) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; break; } } if (hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasCountry = FALSE; break; } } } while (FALSE); } if (resultName.isBogus() || resultName.isEmpty()) { localeIdName(lang, resultName); } UnicodeString resultRemainder; UnicodeString temp; StringEnumeration *e = NULL; UErrorCode status = U_ZERO_ERROR; if (hasScript) { resultRemainder.append(scriptDisplayName(script, temp)); } if (hasCountry) { appendWithSep(resultRemainder, regionDisplayName(country, temp)); } if (hasVariant) { appendWithSep(resultRemainder, variantDisplayName(variant, temp)); } e = locale.createKeywords(status); if (e && U_SUCCESS(status)) { UnicodeString temp2; char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY const char* key; while ((key = e->next((int32_t *)0, status)) != NULL) { locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); keyDisplayName(key, temp); keyValueDisplayName(key, value, temp2); if (temp2 != UnicodeString(value, -1, US_INV)) { appendWithSep(resultRemainder, temp2); } else if (temp != UnicodeString(key, -1, US_INV)) { UnicodeString temp3; Formattable data[] = { temp, temp2 }; FieldPosition fpos; status = U_ZERO_ERROR; keyTypeFormat->format(data, 2, temp3, fpos, status); appendWithSep(resultRemainder, temp3); } else { appendWithSep(resultRemainder, temp) .append((UChar)0x3d /* = */) .append(temp2); } } delete e; } if (!resultRemainder.isEmpty()) { Formattable data[] = { resultName, resultRemainder }; FieldPosition fpos; status = U_ZERO_ERROR; format->format(data, 2, result, fpos, status); return result; } return result = resultName; }
static int32_t unorm_iterate(UCharIterator *src, UBool forward, UChar *dest, int32_t destCapacity, UNormalizationMode mode, int32_t options, UBool doNormalize, UBool *pNeededToNormalize, UErrorCode *pErrorCode) { const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); const UnicodeSet *uni32; if(options&UNORM_UNICODE_3_2) { uni32=uniset_getUnicode32Instance(*pErrorCode); } else { uni32=NULL; // unused } FilteredNormalizer2 fn2(*n2, *uni32); if(options&UNORM_UNICODE_3_2) { n2=&fn2; } if(U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(pNeededToNormalize!=NULL) { *pNeededToNormalize=FALSE; } if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { return u_terminateUChars(dest, destCapacity, 0, pErrorCode); } UnicodeString buffer; UChar32 c; if(forward) { /* get one character and ignore its properties */ buffer.append(uiter_next32(src)); /* get all following characters until we see a boundary */ while((c=uiter_next32(src))>=0) { if(n2->hasBoundaryBefore(c)) { /* back out the latest movement to stop at the boundary */ src->move(src, -U16_LENGTH(c), UITER_CURRENT); break; } else { buffer.append(c); } } } else { while((c=uiter_previous32(src))>=0) { /* always write this character to the front of the buffer */ buffer.insert(0, c); /* stop if this just-copied character is a boundary */ if(n2->hasBoundaryBefore(c)) { break; } } } UnicodeString destString(dest, 0, destCapacity); if(buffer.length()>0 && doNormalize) { n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { *pNeededToNormalize= destString!=buffer; } return destString.length(); } else { /* just copy the source characters */ return buffer.extract(dest, destCapacity, *pErrorCode); } }
void DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, UBool useLastResortData) { if (U_FAILURE(status)) { return; } *validLocale = *actualLocale = 0; currPattern = NULL; // First initialize all the symbols to the fallbacks for anything we can't find initialize(); // // Next get the numbering system for this locale and set zero digit // and the digit string based on the numbering system for the locale // LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(loc, status)); const char *nsName; if (U_SUCCESS(status) && ns->getRadix() == 10 && !ns->isAlgorithmic()) { nsName = ns->getName(); UnicodeString digitString(ns->getDescription()); int32_t digitIndex = 0; UChar32 digit = digitString.char32At(0); fSymbols[kZeroDigitSymbol].setTo(digit); for (int32_t i = kOneDigitSymbol; i <= kNineDigitSymbol; ++i) { digitIndex += U16_LENGTH(digit); digit = digitString.char32At(digitIndex); fSymbols[i].setTo(digit); } } else { nsName = gLatn; } // Open resource bundles const char* locStr = loc.getName(); LocalUResourceBundlePointer resource(ures_open(NULL, locStr, &status)); LocalUResourceBundlePointer numberElementsRes( ures_getByKeyWithFallback(resource.getAlias(), gNumberElements, NULL, &status)); if (U_FAILURE(status)) { if ( useLastResortData ) { status = U_USING_DEFAULT_WARNING; initialize(); } return; } // Set locale IDs // TODO: Is there a way to do this without depending on the resource bundle instance? U_LOCALE_BASED(locBased, *this); locBased.setLocaleIDs( ures_getLocaleByType( numberElementsRes.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType( numberElementsRes.getAlias(), ULOC_ACTUAL_LOCALE, &status)); // Now load the rest of the data from the data sink. // Start with loading this nsName if it is not Latin. DecFmtSymDataSink sink(*this); if (uprv_strcmp(nsName, gLatn) != 0) { CharString path; path.append(gNumberElements, status) .append('/', status) .append(nsName, status) .append('/', status) .append(gSymbols, status); ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status); // If no symbols exist for the given nsName and resource bundle, silently ignore // and fall back to Latin. if (status == U_MISSING_RESOURCE_ERROR) { status = U_ZERO_ERROR; } else if (U_FAILURE(status)) { return; } } // Continue with Latin if necessary. if (!sink.seenAll()) { ures_getAllItemsWithFallback(resource.getAlias(), gNumberElementsLatnSymbols, sink, status); if (U_FAILURE(status)) { return; } } // Let the monetary number separators equal the default number separators if necessary. sink.resolveMissingMonetarySeparators(fSymbols); // Obtain currency data from the currency API. This is strictly // for backward compatibility; we don't use DecimalFormatSymbols // for currency data anymore. UErrorCode internalStatus = U_ZERO_ERROR; // don't propagate failures out UChar curriso[4]; UnicodeString tempStr; ucurr_forLocale(locStr, curriso, 4, &internalStatus); uprv_getStaticCurrencyName(curriso, locStr, tempStr, internalStatus); if (U_SUCCESS(internalStatus)) { fSymbols[kIntlCurrencySymbol].setTo(curriso, -1); fSymbols[kCurrencySymbol] = tempStr; } /* else use the default values. */ //load the currency data UChar ucc[4]={0}; //Currency Codes are always 3 chars long int32_t uccLen = 4; const char* locName = loc.getName(); UErrorCode localStatus = U_ZERO_ERROR; uccLen = ucurr_forLocale(locName, ucc, uccLen, &localStatus); if(U_SUCCESS(localStatus) && uccLen > 0) { char cc[4]={0}; u_UCharsToChars(ucc, cc, uccLen); /* An explicit currency was requested */ LocalUResourceBundlePointer currencyResource(ures_open(U_ICUDATA_CURR, locStr, &localStatus)); LocalUResourceBundlePointer currency( ures_getByKeyWithFallback(currencyResource.getAlias(), "Currencies", NULL, &localStatus)); ures_getByKeyWithFallback(currency.getAlias(), cc, currency.getAlias(), &localStatus); if(U_SUCCESS(localStatus) && ures_getSize(currency.getAlias())>2) { // the length is 3 if more data is present ures_getByIndex(currency.getAlias(), 2, currency.getAlias(), &localStatus); int32_t currPatternLen = 0; currPattern = ures_getStringByIndex(currency.getAlias(), (int32_t)0, &currPatternLen, &localStatus); UnicodeString decimalSep = ures_getUnicodeStringByIndex(currency.getAlias(), (int32_t)1, &localStatus); UnicodeString groupingSep = ures_getUnicodeStringByIndex(currency.getAlias(), (int32_t)2, &localStatus); if(U_SUCCESS(localStatus)){ fSymbols[kMonetaryGroupingSeparatorSymbol] = groupingSep; fSymbols[kMonetarySeparatorSymbol] = decimalSep; //pattern.setTo(TRUE, currPattern, currPatternLen); status = localStatus; } } /* else An explicit currency was requested and is unknown or locale data is malformed. */ /* ucurr_* API will get the correct value later on. */ } // else ignore the error if no currency // Currency Spacing. LocalUResourceBundlePointer currencyResource(ures_open(U_ICUDATA_CURR, locStr, &status)); CurrencySpacingSink currencySink(*this); ures_getAllItemsWithFallback(currencyResource.getAlias(), gCurrencySpacingTag, currencySink, status); currencySink.resolveMissing(); if (U_FAILURE(status)) { return; } }
static UFILE* finit_owner(FILE *f, const char *locale, const char *codepage, UBool takeOwnership ) { UErrorCode status = U_ZERO_ERROR; UFILE *result; if(f == NULL) { return 0; } result = (UFILE*) uprv_malloc(sizeof(UFILE)); if(result == NULL) { return 0; } uprv_memset(result, 0, sizeof(UFILE)); result->fFileno = fileno(f); #ifdef U_WINDOWS if (0 <= result->fFileno && result->fFileno <= 2) { /* stdin, stdout and stderr need to be special cased for Windows 98 */ #if _MSC_VER >= 1400 result->fFile = &__iob_func()[_fileno(f)]; #else result->fFile = &_iob[_fileno(f)]; #endif } else #endif { result->fFile = f; } result->str.fBuffer = result->fUCBuffer; result->str.fPos = result->fUCBuffer; result->str.fLimit = result->fUCBuffer; #if !UCONFIG_NO_FORMATTING /* if locale is 0, use the default */ if(u_locbund_init(&result->str.fBundle, locale) == 0) { /* DO NOT FCLOSE HERE! */ uprv_free(result); return 0; } #endif /* If the codepage is not "" use the ucnv_open default behavior */ if(codepage == NULL || *codepage != '\0') { result->fConverter = ucnv_open(codepage, &status); } /* else result->fConverter is already memset'd to NULL. */ if(U_SUCCESS(status)) { result->fOwnFile = takeOwnership; } else { #if !UCONFIG_NO_FORMATTING u_locbund_close(&result->str.fBundle); #endif /* DO NOT fclose here!!!!!! */ uprv_free(result); result = NULL; } return result; }
//--------------------------------------- UnicodeString& TimeZone::getDisplayName(UBool daylight, EDisplayType style, const Locale& locale, UnicodeString& result) const { // SRL TODO: cache the SDF, just like java. UErrorCode status = U_ZERO_ERROR; #ifdef U_DEBUG_TZ char buf[128]; fID.extract(0, sizeof(buf)-1, buf, sizeof(buf), ""); #endif SimpleDateFormat format(style == LONG ? ZZZZ_STR : Z_STR, locale, status); U_DEBUG_TZ_MSG(("getDisplayName(%s)\n", buf)); if(!U_SUCCESS(status)) { #ifdef U_DEBUG_TZ char buf2[128]; result.extract(0, sizeof(buf2)-1, buf2, sizeof(buf2), ""); U_DEBUG_TZ_MSG(("getDisplayName(%s) -> %s\n", buf, buf2)); #endif return result.remove(); } UDate d = Calendar::getNow(); int32_t rawOffset; int32_t dstOffset; this->getOffset(d, FALSE, rawOffset, dstOffset, status); if (U_FAILURE(status)) { return result.remove(); } if ((daylight && dstOffset != 0) || (!daylight && dstOffset == 0)) { // Current time and the request (daylight / not daylight) agree. format.setTimeZone(*this); return format.format(d, result); } // Create a new SimpleTimeZone as a stand-in for this zone; the // stand-in will have no DST, or DST during July, but the same ID and offset, // and hence the same display name. // We don't cache these because they're small and cheap to create. UnicodeString tempID; getID(tempID); SimpleTimeZone *tz = NULL; if(daylight && useDaylightTime()){ // The display name for daylight saving time was requested, but currently not in DST // Set a fixed date (July 1) in this Gregorian year GregorianCalendar cal(*this, status); if (U_FAILURE(status)) { return result.remove(); } cal.set(UCAL_MONTH, UCAL_JULY); cal.set(UCAL_DATE, 1); // Get July 1 date d = cal.getTime(status); // Check if it is in DST if (cal.get(UCAL_DST_OFFSET, status) == 0) { // We need to create a fake time zone tz = new SimpleTimeZone(rawOffset, tempID, UCAL_JUNE, 1, 0, 0, UCAL_AUGUST, 1, 0, 0, getDSTSavings(), status); if (U_FAILURE(status) || tz == NULL) { if (U_SUCCESS(status)) { status = U_MEMORY_ALLOCATION_ERROR; } return result.remove(); } format.adoptTimeZone(tz); } else { format.setTimeZone(*this); } } else { // The display name for standard time was requested, but currently in DST // or display name for daylight saving time was requested, but this zone no longer // observes DST. tz = new SimpleTimeZone(rawOffset, tempID); if (U_FAILURE(status) || tz == NULL) { if (U_SUCCESS(status)) { status = U_MEMORY_ALLOCATION_ERROR; } return result.remove(); } format.adoptTimeZone(tz); } format.format(d, result, status); return result; }
void DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, UBool useLastResortData) { static const char *gNumberElementKeys[kFormatSymbolCount] = { "decimal", "group", "list", "percentSign", NULL, /* Native zero digit is deprecated from CLDR - get it from the numbering system */ NULL, /* Pattern digit character is deprecated from CLDR - use # by default always */ "minusSign", "plusSign", NULL, /* currency symbol - We don't really try to load this directly from CLDR until we know the currency */ NULL, /* intl currency symbol - We don't really try to load this directly from CLDR until we know the currency */ "currencyDecimal", "exponential", "perMille", NULL, /* Escape padding character - not in CLDR */ "infinity", "nan", NULL, /* Significant digit symbol - not in CLDR */ "currencyGroup", NULL, /* one digit - get it from the numbering system */ NULL, /* two digit - get it from the numbering system */ NULL, /* three digit - get it from the numbering system */ NULL, /* four digit - get it from the numbering system */ NULL, /* five digit - get it from the numbering system */ NULL, /* six digit - get it from the numbering system */ NULL, /* seven digit - get it from the numbering system */ NULL, /* eight digit - get it from the numbering system */ NULL, /* nine digit - get it from the numbering system */ }; static const char *gLatn = "latn"; static const char *gSymbols = "symbols"; const char *nsName; const UChar *sym = NULL; int32_t len = 0; *validLocale = *actualLocale = 0; currPattern = NULL; if (U_FAILURE(status)) return; const char* locStr = loc.getName(); UResourceBundle *resource = ures_open((char *)0, locStr, &status); UResourceBundle *numberElementsRes = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status); if (U_FAILURE(status)) { if ( useLastResortData ) { status = U_USING_FALLBACK_WARNING; initialize(); } return; } else { // First initialize all the symbols to the fallbacks for anything we can't find initialize(); // // Next get the numbering system for this locale and set zero digit // and the digit string based on the numbering system for the locale // NumberingSystem* ns = NumberingSystem::createInstance(loc,status); if (U_SUCCESS(status) && ns->getRadix() == 10 && !ns->isAlgorithmic()) { nsName = ns->getName(); UnicodeString digitString(ns->getDescription()); setSymbol(kZeroDigitSymbol, digitString.tempSubString(0, 1), FALSE); setSymbol(kOneDigitSymbol, digitString.tempSubString(1, 1), FALSE); setSymbol(kTwoDigitSymbol, digitString.tempSubString(2, 1), FALSE); setSymbol(kThreeDigitSymbol, digitString.tempSubString(3, 1), FALSE); setSymbol(kFourDigitSymbol, digitString.tempSubString(4, 1), FALSE); setSymbol(kFiveDigitSymbol, digitString.tempSubString(5, 1), FALSE); setSymbol(kSixDigitSymbol, digitString.tempSubString(6, 1), FALSE); setSymbol(kSevenDigitSymbol, digitString.tempSubString(7, 1), FALSE); setSymbol(kEightDigitSymbol, digitString.tempSubString(8, 1), FALSE); setSymbol(kNineDigitSymbol, digitString.tempSubString(9, 1), FALSE); } else { nsName = gLatn; } UBool isLatn = !uprv_strcmp(nsName,gLatn); UErrorCode nlStatus = U_ZERO_ERROR; UResourceBundle *nonLatnSymbols = NULL; if ( !isLatn ) { nonLatnSymbols = ures_getByKeyWithFallback(numberElementsRes, nsName, NULL, &nlStatus); nonLatnSymbols = ures_getByKeyWithFallback(nonLatnSymbols, gSymbols, nonLatnSymbols, &nlStatus); } UResourceBundle *latnSymbols = ures_getByKeyWithFallback(numberElementsRes, gLatn, NULL, &status); latnSymbols = ures_getByKeyWithFallback(latnSymbols, gSymbols, latnSymbols, &status); UBool kMonetaryDecimalSet = FALSE; UBool kMonetaryGroupingSet = FALSE; for(int32_t i = 0; i<kFormatSymbolCount; i++) { if ( gNumberElementKeys[i] != NULL ) { UErrorCode localStatus = U_ZERO_ERROR; if ( !isLatn ) { sym = ures_getStringByKeyWithFallback(nonLatnSymbols,gNumberElementKeys[i],&len,&localStatus); // If we can't find the symbol in the numbering system specific resources, // use the "latn" numbering system as the fallback. if ( U_FAILURE(localStatus) ) { localStatus = U_ZERO_ERROR; sym = ures_getStringByKeyWithFallback(latnSymbols,gNumberElementKeys[i],&len,&localStatus); } } else { sym = ures_getStringByKeyWithFallback(latnSymbols,gNumberElementKeys[i],&len,&localStatus); } if ( U_SUCCESS(localStatus) ) { setSymbol((ENumberFormatSymbol)i, UnicodeString(TRUE, sym, len)); if ( i == kMonetarySeparatorSymbol ) { kMonetaryDecimalSet = TRUE; } else if ( i == kMonetaryGroupingSeparatorSymbol ) { kMonetaryGroupingSet = TRUE; } } } } ures_close(latnSymbols); if ( !isLatn ) { ures_close(nonLatnSymbols); } // If monetary decimal or grouping were not explicitly set, then set them to be the // same as their non-monetary counterparts. if ( !kMonetaryDecimalSet ) { setSymbol(kMonetarySeparatorSymbol,fSymbols[kDecimalSeparatorSymbol]); } if ( !kMonetaryGroupingSet ) { setSymbol(kMonetaryGroupingSeparatorSymbol,fSymbols[kGroupingSeparatorSymbol]); } if (ns) { delete ns; } // Obtain currency data from the currency API. This is strictly // for backward compatibility; we don't use DecimalFormatSymbols // for currency data anymore. UErrorCode internalStatus = U_ZERO_ERROR; // don't propagate failures out UChar curriso[4]; UnicodeString tempStr; ucurr_forLocale(locStr, curriso, 4, &internalStatus); // Reuse numberElements[0] as a temporary buffer uprv_getStaticCurrencyName(curriso, locStr, tempStr, internalStatus); if (U_SUCCESS(internalStatus)) { fSymbols[kIntlCurrencySymbol].setTo(curriso, -1); fSymbols[kCurrencySymbol] = tempStr; } /* else use the default values. */ U_LOCALE_BASED(locBased, *this); locBased.setLocaleIDs(ures_getLocaleByType(numberElementsRes, ULOC_VALID_LOCALE, &status), ures_getLocaleByType(numberElementsRes, ULOC_ACTUAL_LOCALE, &status)); //load the currency data UChar ucc[4]={0}; //Currency Codes are always 3 chars long int32_t uccLen = 4; const char* locName = loc.getName(); UErrorCode localStatus = U_ZERO_ERROR; uccLen = ucurr_forLocale(locName, ucc, uccLen, &localStatus); if(U_SUCCESS(localStatus) && uccLen > 0) { char cc[4]={0}; u_UCharsToChars(ucc, cc, uccLen); /* An explicit currency was requested */ UResourceBundle *currencyResource = ures_open(U_ICUDATA_CURR, locStr, &localStatus); UResourceBundle *currency = ures_getByKeyWithFallback(currencyResource, "Currencies", NULL, &localStatus); currency = ures_getByKeyWithFallback(currency, cc, currency, &localStatus); if(U_SUCCESS(localStatus) && ures_getSize(currency)>2) { // the length is 3 if more data is present currency = ures_getByIndex(currency, 2, currency, &localStatus); int32_t currPatternLen = 0; currPattern = ures_getStringByIndex(currency, (int32_t)0, &currPatternLen, &localStatus); UnicodeString decimalSep = ures_getUnicodeStringByIndex(currency, (int32_t)1, &localStatus); UnicodeString groupingSep = ures_getUnicodeStringByIndex(currency, (int32_t)2, &localStatus); if(U_SUCCESS(localStatus)){ fSymbols[kMonetaryGroupingSeparatorSymbol] = groupingSep; fSymbols[kMonetarySeparatorSymbol] = decimalSep; //pattern.setTo(TRUE, currPattern, currPatternLen); status = localStatus; } } ures_close(currency); ures_close(currencyResource); /* else An explicit currency was requested and is unknown or locale data is malformed. */ /* ucurr_* API will get the correct value later on. */ } // else ignore the error if no currency // Currency Spacing. localStatus = U_ZERO_ERROR; UResourceBundle *currencyResource = ures_open(U_ICUDATA_CURR, locStr, &localStatus); UResourceBundle *currencySpcRes = ures_getByKeyWithFallback(currencyResource, gCurrencySpacingTag, NULL, &localStatus); if (localStatus == U_USING_FALLBACK_WARNING || U_SUCCESS(localStatus)) { const char* keywords[UNUM_CURRENCY_SPACING_COUNT] = { gCurrencyMatchTag, gCurrencySudMatchTag, gCurrencyInsertBtnTag }; localStatus = U_ZERO_ERROR; UResourceBundle *dataRes = ures_getByKeyWithFallback(currencySpcRes, gBeforeCurrencyTag, NULL, &localStatus); if (localStatus == U_USING_FALLBACK_WARNING || U_SUCCESS(localStatus)) { localStatus = U_ZERO_ERROR; for (int32_t i = 0; i < UNUM_CURRENCY_SPACING_COUNT; i++) { currencySpcBeforeSym[i] = ures_getUnicodeStringByKey(dataRes, keywords[i], &localStatus); } ures_close(dataRes); } dataRes = ures_getByKeyWithFallback(currencySpcRes, gAfterCurrencyTag, NULL, &localStatus); if (localStatus == U_USING_FALLBACK_WARNING || U_SUCCESS(localStatus)) { localStatus = U_ZERO_ERROR; for (int32_t i = 0; i < UNUM_CURRENCY_SPACING_COUNT; i++) { currencySpcAfterSym[i] = ures_getUnicodeStringByKey(dataRes, keywords[i], &localStatus); } ures_close(dataRes); } ures_close(currencySpcRes); ures_close(currencyResource); } } ures_close(resource); ures_close(numberElementsRes); }
UBool Transliterator::initializeRegistry(UErrorCode &status) { if (registry != 0) { return TRUE; } registry = new TransliteratorRegistry(status); if (registry == 0 || U_FAILURE(status)) { delete registry; registry = 0; return FALSE; // can't create registry, no recovery } /* The following code parses the index table located in * icu/data/translit/root.txt. The index is an n x 4 table * that follows this format: * <id>{ * file{ * resource{"<resource>"} * direction{"<direction>"} * } * } * <id>{ * internal{ * resource{"<resource>"} * direction{"<direction"} * } * } * <id>{ * alias{"<getInstanceArg"} * } * <id> is the ID of the system transliterator being defined. These * are public IDs enumerated by Transliterator.getAvailableIDs(), * unless the second field is "internal". * * <resource> is a ResourceReader resource name. Currently these refer * to file names under com/ibm/text/resources. This string is passed * directly to ResourceReader, together with <encoding>. * * <direction> is either "FORWARD" or "REVERSE". * * <getInstanceArg> is a string to be passed directly to * Transliterator.getInstance(). The returned Transliterator object * then has its ID changed to <id> and is returned. * * The extra blank field on "alias" lines is to make the array square. */ //static const char translit_index[] = "translit_index"; UResourceBundle *bundle, *transIDs, *colBund; bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); int32_t row, maxRows; if (U_SUCCESS(status)) { maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { colBund = ures_getByIndex(transIDs, row, 0, &status); if (U_SUCCESS(status)) { UnicodeString id(ures_getKey(colBund), -1, US_INV); UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); const char* typeStr = ures_getKey(res); UChar type; u_charsToUChars(typeStr, &type, 1); if (U_SUCCESS(status)) { int32_t len = 0; const UChar *resString; switch (type) { case 0x66: // 'f' case 0x69: // 'i' // 'file' or 'internal'; // row[2]=resource, row[3]=direction { resString = ures_getStringByKey(res, "resource", &len, &status); UBool visible = (type == 0x0066 /*f*/); UTransDirection dir = (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == 0x0046 /*F*/) ? UTRANS_FORWARD : UTRANS_REVERSE; registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); } break; case 0x61: // 'a' // 'alias'; row[2]=createInstance argument resString = ures_getString(res, &len, &status); registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); break; } } ures_close(res); } ures_close(colBund); } } ures_close(transIDs); ures_close(bundle); // Manually add prototypes that the system knows about to the // cache. This is how new non-rule-based transliterators are // added to the system. // This is to allow for null pointer check NullTransliterator* tempNullTranslit = new NullTransliterator(); LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator(); UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator(); TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator(); UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterator(); NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTransliterator(); #if !UCONFIG_NO_BREAK_ITERATION // TODO: could or should these transliterators be referenced polymorphically once constructed? BreakTransliterator* tempBreakTranslit = new BreakTransliterator(); #endif // Check for null pointers if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL || tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || #if !UCONFIG_NO_BREAK_ITERATION tempBreakTranslit == NULL || #endif tempNameUnicodeTranslit == NULL ) { delete tempNullTranslit; delete tempLowercaseTranslit; delete tempUppercaseTranslit; delete tempTitlecaseTranslit; delete tempUnicodeTranslit; delete tempNameUnicodeTranslit; #if !UCONFIG_NO_BREAK_ITERATION delete tempBreakTranslit; #endif // Since there was an error, remove registry delete registry; registry = NULL; status = U_MEMORY_ALLOCATION_ERROR; return 0; } registry->put(tempNullTranslit, TRUE, status); registry->put(tempLowercaseTranslit, TRUE, status); registry->put(tempUppercaseTranslit, TRUE, status); registry->put(tempTitlecaseTranslit, TRUE, status); registry->put(tempUnicodeTranslit, TRUE, status); registry->put(tempNameUnicodeTranslit, TRUE, status); #if !UCONFIG_NO_BREAK_ITERATION registry->put(tempBreakTranslit, FALSE, status); // FALSE means invisible. #endif RemoveTransliterator::registerIDs(); // Must be within mutex EscapeTransliterator::registerIDs(); UnescapeTransliterator::registerIDs(); NormalizationTransliterator::registerIDs(); AnyTransliterator::registerIDs(); _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"), UNICODE_STRING_SIMPLE("Null"), FALSE); _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"), UNICODE_STRING_SIMPLE("Lower"), TRUE); _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"), UNICODE_STRING_SIMPLE("Lower"), FALSE); ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup); return TRUE; }
void DataDrivenCalendarTest::testConvert(TestData *testData, const DataMap *settings, UBool forward) { UErrorCode status = U_ZERO_ERROR; Calendar *toCalendar= NULL; const DataMap *currentCase= NULL; char toCalLoc[256] = ""; char fromCalLoc[256] = ""; // build to calendar UnicodeString testSetting = settings->getString("ToCalendar", status); if (U_SUCCESS(status)) { testSetting.extract(0, testSetting.length(), toCalLoc, (const char*)0); toCalendar = Calendar::createInstance(toCalLoc, status); if (U_FAILURE(status)) { dataerrln(UnicodeString("Unable to instantiate ToCalendar for ")+testSetting); return; } } CalendarFieldsSet fromSet, toSet, diffSet; SimpleDateFormat fmt(UnicodeString("EEE MMM dd yyyy / YYYY'-W'ww-ee"), status); if (U_FAILURE(status)) { errcheckln(status, "FAIL: Couldn't create SimpleDateFormat: %s", u_errorName(status)); return; } // Start the processing int n = 0; while (testData->nextCase(currentCase, status)) { ++n; Calendar *fromCalendar= NULL; UnicodeString locale = currentCase->getString("locale", status); if (U_SUCCESS(status)) { locale.extract(0, locale.length(), fromCalLoc, (const char*)0); // default codepage. Invariant codepage doesn't have '@'! fromCalendar = Calendar::createInstance(fromCalLoc, status); if (U_FAILURE(status)) { errln("Unable to instantiate fromCalendar for "+locale); return; } } else { errln("No 'locale' line."); continue; } fromSet.clear(); toSet.clear(); UnicodeString from = currentCase->getString("from", status); if (U_FAILURE(status)) { errln("No 'from' line."); continue; } fromSet.parseFrom(from, status); if (U_FAILURE(status)) { errln("Failed to parse 'from' parameter: "+from); continue; } UnicodeString to = currentCase->getString("to", status); if (U_FAILURE(status)) { errln("No 'to' line."); continue; } toSet.parseFrom(to, &fromSet, status); if (U_FAILURE(status)) { errln("Failed to parse 'to' parameter: "+to); continue; } // now, do it. if (forward) { logln((UnicodeString)"#"+n+" "+locale+"/"+from+" >>> "+toCalLoc+"/" +to); testConvert(n, fromSet, fromCalendar, toSet, toCalendar, forward); } else { logln((UnicodeString)"#"+n+" "+locale+"/"+from+" <<< "+toCalLoc+"/" +to); testConvert(n, toSet, toCalendar, fromSet, fromCalendar, forward); } delete fromCalendar; } delete toCalendar; }
Transliterator* U_EXPORT2 Transliterator::createFromRules(const UnicodeString& ID, const UnicodeString& rules, UTransDirection dir, UParseError& parseError, UErrorCode& status) { Transliterator* t = NULL; TransliteratorParser parser(status); parser.parse(rules, dir, parseError, status); if (U_FAILURE(status)) { return 0; } // NOTE: The logic here matches that in TransliteratorRegistry. if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { t = new NullTransliterator(); } else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE); } else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { // idBlock, no data -- this is an alias. The ID has // been munged from reverse into forward mode, if // necessary, so instantiate the ID in the forward // direction. if (parser.compoundFilter != NULL) { UnicodeString filterPattern; parser.compoundFilter->toPattern(filterPattern, FALSE); t = createInstance(filterPattern + UnicodeString(ID_DELIM) + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); } else t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); if (t != NULL) { t->setID(ID); } } else { UVector transliterators(status); int32_t passNumber = 1; int32_t limit = parser.idBlockVector.size(); if (parser.dataVector.size() > limit) limit = parser.dataVector.size(); for (int32_t i = 0; i < limit; i++) { if (i < parser.idBlockVector.size()) { UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); if (!idBlock->isEmpty()) { Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status); if (temp != NULL && temp->getDynamicClassID() != NullTransliterator::getStaticClassID()) transliterators.addElement(temp, status); else delete temp; } } if (!parser.dataVector.isEmpty()) { TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++), data, TRUE); // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer. if (temprbt == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return t; } transliterators.addElement(temprbt, status); } } t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status); // Null pointer check if (t != NULL) { t->setID(ID); t->adoptFilter(parser.orphanCompoundFilter()); } } if (U_SUCCESS(status) && t == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } return t; }
/** * Implements {@link Transliterator#handleTransliterate}. */ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { // The failure mode, here and below, is to behave like Any-Null, // if either there is no name data (max len == 0) or there is no // memory (malloc() => NULL). int32_t maxLen = uprv_getMaxCharNameLength(); if (maxLen == 0) { offsets.start = offsets.limit; return; } // Accomodate the longest possible name ++maxLen; // allow for temporary trailing space char* cbuf = (char*) uprv_malloc(maxLen); if (cbuf == NULL) { offsets.start = offsets.limit; return; } UnicodeString openPat(TRUE, OPEN, -1); UnicodeString str, name; int32_t cursor = offsets.start; int32_t limit = offsets.limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int32_t mode = 0; int32_t openPos = -1; // open delim candidate pos UChar32 c; while (cursor < limit) { c = text.char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int32_t i = ICU_Utility::parsePattern(openPat, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.truncate(0); cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (uprv_isRuleWhiteSpace(c)) { // Ignore leading whitespace if (name.length() > 0 && name.charAt(name.length()-1) != SPACE) { name.append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.length() > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int32_t len = name.length(); // Delete trailing space, if any if (len > 0 && name.charAt(len-1) == SPACE) { --len; } if (uprv_isInvariantUString(name.getBuffer(), len)) { name.extract(0, len, cbuf, maxLen, US_INV); UErrorCode status = U_ZERO_ERROR; c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status); if (U_SUCCESS(status)) { // Lookup succeeded // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM str.truncate(0); str.append(c); text.handleReplaceBetween(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int32_t delta = cursor - openPos - str.length(); cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } // Check if c is a legal char. We assume here that // legal.contains(OPEN_DELIM) is FALSE, so when we abort a // name, we don't have to go back to openPos+1. if (legal.contains(c)) { name.append(c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.length() >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += UTF_CHAR_LENGTH(c); } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; uprv_free(cbuf); }
UBool testTag(const char* frag, UBool in_Root, UBool in_te, UBool in_te_IN) { int32_t passNum=pass; /* Make array from input params */ UBool is_in[3]; const char *NAME[] = { "ROOT", "TE", "TE_IN" }; /* Now try to load the desired items */ UResourceBundle* theBundle = NULL; char tag[99]; char action[256]; UErrorCode status = U_ZERO_ERROR,expected_resource_status = U_ZERO_ERROR; UChar* base = NULL; UChar* expected_string = NULL; const UChar* string = NULL; char item_tag[10]; int32_t i,j; int32_t actual_bundle; int32_t resultLen; const char *testdatapath = loadTestData(&status); is_in[0] = in_Root; is_in[1] = in_te; is_in[2] = in_te_IN; strcpy(item_tag, "tag"); status = U_ZERO_ERROR; theBundle = ures_open(testdatapath, "root", &status); if(U_FAILURE(status)) { ures_close(theBundle); log_err("Couldn't open root bundle in %s", testdatapath); return FALSE; } ures_close(theBundle); theBundle = NULL; for (i=0; i<bundles_count; ++i) { strcpy(action,"construction for"); strcat(action, param[i].name); status = U_ZERO_ERROR; theBundle = ures_open(testdatapath, param[i].name, &status); /*theBundle = ures_open("c:\\icu\\icu\\source\\test\\testdata\\testdata", param[i].name, &status);*/ CONFIRM_ErrorCode(status,param[i].expected_constructor_status); if(i == 5) actual_bundle = 0; /* ne -> default */ else if(i == 3) actual_bundle = 1; /* te_NE -> te */ else if(i == 4) actual_bundle = 2; /* te_IN_NE -> te_IN */ else actual_bundle = i; expected_resource_status = U_MISSING_RESOURCE_ERROR; for (j=e_te_IN; j>=e_Root; --j) { if (is_in[j] && param[i].inherits[j]) { if(j == actual_bundle) /* it's in the same bundle OR it's a nonexistent=default bundle (5) */ expected_resource_status = U_ZERO_ERROR; else if(j == 0) expected_resource_status = U_USING_DEFAULT_WARNING; else expected_resource_status = U_USING_FALLBACK_WARNING; log_verbose("%s[%d]::%s: in<%d:%s> inherits<%d:%s>. actual_bundle=%s\n", param[i].name, i, frag, j, is_in[j]?"Yes":"No", j, param[i].inherits[j]?"Yes":"No", param[actual_bundle].name); break; } } for (j=param[i].where; j>=0; --j) { if (is_in[j]) { if(base != NULL) { free(base); base = NULL; } base=(UChar*)malloc(sizeof(UChar)*(strlen(NAME[j]) + 1)); u_uastrcpy(base,NAME[j]); break; } else { if(base != NULL) { free(base); base = NULL; } base = (UChar*) malloc(sizeof(UChar) * 1); *base = 0x0000; } } /*-------------------------------------------------------------------- */ /* string */ strcpy(tag,"string_"); strcat(tag,frag); strcpy(action,param[i].name); strcat(action, ".ures_get(" ); strcat(action,tag); strcat(action, ")"); string= kERROR; status = U_ZERO_ERROR; ures_getStringByKey(theBundle, tag, &resultLen, &status); if(U_SUCCESS(status)) { status = U_ZERO_ERROR; string=ures_getStringByKey(theBundle, tag, &resultLen, &status); } log_verbose("%s got %d, expected %d\n", action, status, expected_resource_status); CONFIRM_ErrorCode(status, expected_resource_status); if(U_SUCCESS(status)){ expected_string=(UChar*)malloc(sizeof(UChar)*(u_strlen(base) + 3)); u_strcpy(expected_string,base); } else { expected_string = (UChar*)malloc(sizeof(UChar)*(u_strlen(kERROR) + 1)); u_strcpy(expected_string,kERROR); } CONFIRM_EQ(string, expected_string); free(expected_string); ures_close(theBundle); } free(base); return (UBool)(passNum == pass); }
static void TestRelativeCrash(void) { static const UChar tzName[] = { 0x0055, 0x0053, 0x002F, 0x0050, 0x0061, 0x0063, 0x0069, 0x0066, 0x0069, 0x0063, 0 }; static const UDate aDate = -631152000000.0; UErrorCode status = U_ZERO_ERROR; UErrorCode expectStatus = U_ILLEGAL_ARGUMENT_ERROR; UDateFormat icudf; icudf = udat_open(UDAT_NONE, UDAT_SHORT_RELATIVE, "en", tzName, -1, NULL, 0, &status); if ( U_SUCCESS(status) ) { const char *what = "???"; { UErrorCode subStatus = U_ZERO_ERROR; what = "udat_set2DigitYearStart"; log_verbose("Trying %s on a relative date..\n", what); udat_set2DigitYearStart(icudf, aDate, &subStatus); if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { /* clone works polymorphically. try it anyways */ UErrorCode subStatus = U_ZERO_ERROR; UDateFormat *oth; what = "clone"; log_verbose("Trying %s on a relative date..\n", what); oth = udat_clone(icudf, &subStatus); if(subStatus == U_ZERO_ERROR) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); udat_close(oth); /* ? */ } else { log_err("FAIL: didn't crash on %s, but got %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { UErrorCode subStatus = U_ZERO_ERROR; what = "udat_get2DigitYearStart"; log_verbose("Trying %s on a relative date..\n", what); udat_get2DigitYearStart(icudf, &subStatus); if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { /* Now udat_toPattern works for relative date formatters, unless localized is TRUE */ UErrorCode subStatus = U_ZERO_ERROR; what = "udat_toPattern"; log_verbose("Trying %s on a relative date..\n", what); udat_toPattern(icudf, TRUE,NULL,0, &subStatus); if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { UErrorCode subStatus = U_ZERO_ERROR; what = "udat_applyPattern"; log_verbose("Trying %s on a relative date..\n", what); udat_applyPattern(icudf, FALSE,tzName,-1); subStatus = U_ILLEGAL_ARGUMENT_ERROR; /* what it should be, if this took an errorcode. */ if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { UChar erabuf[32]; UErrorCode subStatus = U_ZERO_ERROR; what = "udat_getSymbols"; log_verbose("Trying %s on a relative date..\n", what); udat_getSymbols(icudf, UDAT_ERAS,0,erabuf,sizeof(erabuf)/sizeof(erabuf[0]), &subStatus); if(subStatus == U_ZERO_ERROR) { log_verbose("Success: %s returned %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got %s instead of U_ZERO_ERROR.\n", what, u_errorName(subStatus)); } } { UErrorCode subStatus = U_ZERO_ERROR; UChar symbolValue = 0x0041; what = "udat_setSymbols"; log_verbose("Trying %s on a relative date..\n", what); udat_setSymbols(icudf, UDAT_ERAS,0,&symbolValue,1, &subStatus); /* bogus values */ if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } { UErrorCode subStatus = U_ZERO_ERROR; what = "udat_countSymbols"; log_verbose("Trying %s on a relative date..\n", what); udat_countSymbols(icudf, UDAT_ERAS); subStatus = U_ILLEGAL_ARGUMENT_ERROR; /* should have an errorcode. */ if(subStatus == expectStatus) { log_verbose("Success: did not crash on %s, but got %s.\n", what, u_errorName(subStatus)); } else { log_err("FAIL: didn't crash on %s, but got success %s instead of %s. \n", what, u_errorName(subStatus), u_errorName(expectStatus)); } } udat_close(icudf); } else { log_data_err("FAIL: err calling udat_open() ->%s (Are you missing data?)\n", u_errorName(status)); } }
/** * Reads the segments[] array (see applyPattern()) and parses the * segments[1..3] into a Format* object. Stores the format object in * the subformats[] array. Updates the argTypes[] array type * information for the corresponding argument. * * @param formatNumber index into subformats[] for this format * @param segments array of strings with the parsed pattern segments * @param parseError parse error data (output param) * @param ec error code */ void MessageFormat::makeFormat(int32_t formatNumber, UnicodeString* segments, UParseError& parseError, UErrorCode& ec) { if (U_FAILURE(ec)) { return; } // Parse the argument number int32_t argumentNumber = stou(segments[1]); // always unlocalized! if (argumentNumber < 0) { ec = U_INVALID_FORMAT_ERROR; return; } // Parse the format, recording the argument type and creating a // new Format object (except for string arguments). Formattable::Type argType; Format *fmt = NULL; int32_t typeID, styleID; DateFormat::EStyle style; switch (typeID = findKeyword(segments[2], TYPE_IDS)) { case 0: // string argType = Formattable::kString; break; case 1: // number argType = Formattable::kDouble; switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) { case 0: // default fmt = NumberFormat::createInstance(fLocale, ec); break; case 1: // currency fmt = NumberFormat::createCurrencyInstance(fLocale, ec); break; case 2: // percent fmt = NumberFormat::createPercentInstance(fLocale, ec); break; case 3: // integer argType = Formattable::kLong; fmt = createIntegerFormat(fLocale, ec); break; default: // pattern fmt = NumberFormat::createInstance(fLocale, ec); if (fmt && fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec); } break; } break; case 2: // date case 3: // time argType = Formattable::kDate; styleID = findKeyword(segments[3], DATE_STYLE_IDS); style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; if (typeID == 2) { fmt = DateFormat::createDateInstance(style, fLocale); } else { fmt = DateFormat::createTimeInstance(style, fLocale); } if (styleID < 0 && fmt != NULL && fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { ((SimpleDateFormat*)fmt)->applyPattern(segments[3]); } break; case 4: // choice argType = Formattable::kDouble; fmt = new ChoiceFormat(segments[3], parseError, ec); break; case 5: // spellout argType = Formattable::kDouble; fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec); break; case 6: // ordinal argType = Formattable::kDouble; fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec); break; case 7: // duration argType = Formattable::kDouble; fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); break; default: argType = Formattable::kString; ec = U_ILLEGAL_ARGUMENT_ERROR; break; } if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) { ec = U_MEMORY_ALLOCATION_ERROR; } if (!allocateSubformats(formatNumber+1) || !allocateArgTypes(argumentNumber+1)) { ec = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(ec)) { delete fmt; return; } // Parse succeeded; record results in our arrays subformats[formatNumber].format = fmt; subformats[formatNumber].offset = segments[0].length(); subformats[formatNumber].arg = argumentNumber; subformatCount = formatNumber+1; // Careful here: argumentNumber may in general arrive out of // sequence, e.g., "There was {2} on {0,date} (see {1,number})." argTypes[argumentNumber] = argType; if (argumentNumber+1 > argTypeCount) { argTypeCount = argumentNumber+1; } }
static void TestRelativeDateFormat() { UDate today = 0.0; const UDateFormatStyle * stylePtr; const UChar ** monthPtnPtr; UErrorCode status = U_ZERO_ERROR; UCalendar * ucal = ucal_open(trdfZone, -1, trdfLocale, UCAL_GREGORIAN, &status); if ( U_SUCCESS(status) ) { int32_t year, month, day; ucal_setMillis(ucal, ucal_getNow(), &status); year = ucal_get(ucal, UCAL_YEAR, &status); month = ucal_get(ucal, UCAL_MONTH, &status); day = ucal_get(ucal, UCAL_DATE, &status); ucal_setDateTime(ucal, year, month, day, 18, 49, 0, &status); /* set to today at 18:49:00 */ today = ucal_getMillis(ucal, &status); ucal_close(ucal); } if ( U_FAILURE(status) || today == 0.0 ) { log_data_err("Generate UDate for a specified time today fails, error %s - (Are you missing data?)\n", myErrorName(status) ); return; } for (stylePtr = dateStylesList, monthPtnPtr = monthPatnsList; *stylePtr != UDAT_NONE; ++stylePtr, ++monthPtnPtr) { UDateFormat* fmtRelDateTime; UDateFormat* fmtRelDate; UDateFormat* fmtTime; int32_t dayOffset, limit; UFieldPosition fp; UChar strDateTime[kDateAndTimeOutMax]; UChar strDate[kDateOrTimeOutMax]; UChar strTime[kDateOrTimeOutMax]; UChar * strPtr; int32_t dtpatLen; fmtRelDateTime = udat_open(UDAT_SHORT, *stylePtr | UDAT_RELATIVE, trdfLocale, trdfZone, -1, NULL, 0, &status); if ( U_FAILURE(status) ) { log_data_err("udat_open timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s (Are you missing data?)\n", *stylePtr, myErrorName(status) ); continue; } fmtRelDate = udat_open(UDAT_NONE, *stylePtr | UDAT_RELATIVE, trdfLocale, trdfZone, -1, NULL, 0, &status); if ( U_FAILURE(status) ) { log_err("udat_open timeStyle NONE dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); udat_close(fmtRelDateTime); continue; } fmtTime = udat_open(UDAT_SHORT, UDAT_NONE, trdfLocale, trdfZone, -1, NULL, 0, &status); if ( U_FAILURE(status) ) { log_err("udat_open timeStyle SHORT dateStyle NONE fails, error %s\n", myErrorName(status) ); udat_close(fmtRelDateTime); udat_close(fmtRelDate); continue; } dtpatLen = udat_toPatternRelativeDate(fmtRelDateTime, strDate, kDateAndTimeOutMax, &status); if ( U_FAILURE(status) ) { log_err("udat_toPatternRelativeDate timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strDate, *monthPtnPtr) == NULL || dtpatLen != u_strlen(strDate) ) { log_err("udat_toPatternRelativeDate timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) date pattern incorrect\n", *stylePtr ); } dtpatLen = udat_toPatternRelativeTime(fmtRelDateTime, strTime, kDateAndTimeOutMax, &status); if ( U_FAILURE(status) ) { log_err("udat_toPatternRelativeTime timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strTime, minutesPatn) == NULL || dtpatLen != u_strlen(strTime) ) { log_err("udat_toPatternRelativeTime timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) time pattern incorrect\n", *stylePtr ); } dtpatLen = udat_toPattern(fmtRelDateTime, FALSE, strDateTime, kDateAndTimeOutMax, &status); if ( U_FAILURE(status) ) { log_err("udat_toPattern timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strDateTime, strDate) == NULL || u_strstr(strDateTime, strTime) == NULL || dtpatLen != u_strlen(strDateTime) ) { log_err("udat_toPattern timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) dateTime pattern incorrect\n", *stylePtr ); } udat_applyPatternRelative(fmtRelDateTime, strDate, u_strlen(strDate), newTimePatn, u_strlen(newTimePatn), &status); if ( U_FAILURE(status) ) { log_err("udat_applyPatternRelative timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else { udat_toPattern(fmtRelDateTime, FALSE, strDateTime, kDateAndTimeOutMax, &status); if ( U_FAILURE(status) ) { log_err("udat_toPattern timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strDateTime, newTimePatn) == NULL ) { log_err("udat_applyPatternRelative timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) didn't update time pattern\n", *stylePtr ); } } udat_applyPatternRelative(fmtRelDateTime, strDate, u_strlen(strDate), strTime, u_strlen(strTime), &status); /* restore original */ fp.field = UDAT_MINUTE_FIELD; for (dayOffset = -2, limit = 2; dayOffset <= limit; ++dayOffset) { UDate dateToUse = today + (float)dayOffset*dayInterval; udat_format(fmtRelDateTime, dateToUse, strDateTime, kDateAndTimeOutMax, &fp, &status); if ( U_FAILURE(status) ) { log_err("udat_format timeStyle SHORT dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else { udat_format(fmtRelDate, dateToUse, strDate, kDateOrTimeOutMax, NULL, &status); if ( U_FAILURE(status) ) { log_err("udat_format timeStyle NONE dateStyle (%d | UDAT_RELATIVE) fails, error %s\n", *stylePtr, myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strDateTime, strDate) == NULL ) { log_err("relative date string not found in udat_format timeStyle SHORT dateStyle (%d | UDAT_RELATIVE)\n", *stylePtr ); } udat_format(fmtTime, dateToUse, strTime, kDateOrTimeOutMax, NULL, &status); if ( U_FAILURE(status) ) { log_err("udat_format timeStyle SHORT dateStyle NONE fails, error %s\n", myErrorName(status) ); status = U_ZERO_ERROR; } else if ( u_strstr(strDateTime, strTime) == NULL ) { log_err("time string not found in udat_format timeStyle SHORT dateStyle (%d | UDAT_RELATIVE)\n", *stylePtr ); } strPtr = u_strstr(strDateTime, minutesStr); if ( strPtr != NULL ) { int32_t beginIndex = strPtr - strDateTime; if ( fp.beginIndex != beginIndex ) { log_err("UFieldPosition beginIndex %d, expected %d, in udat_format timeStyle SHORT dateStyle (%d | UDAT_RELATIVE)\n", fp.beginIndex, beginIndex, *stylePtr ); } } else { log_err("minutes string not found in udat_format timeStyle SHORT dateStyle (%d | UDAT_RELATIVE)\n", *stylePtr ); } } } udat_close(fmtRelDateTime); udat_close(fmtRelDate); udat_close(fmtTime); } }
U_CAPI void U_EXPORT2 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); if(indexMap==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ ubidi_countRuns(pBiDi, pErrorCode); if(U_SUCCESS(*pErrorCode)) { /* fill a visual-to-logical index map using the runs[] */ Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; if (pBiDi->resultLength<=0) { return; } visualStart=0; for(; runs<runsLimit; ++runs) { logicalStart=runs->logicalStart; visualLimit=runs->visualLimit; if(IS_EVEN_RUN(logicalStart)) { do { /* LTR */ *pi++ = logicalStart++; } while(++visualStart<visualLimit); } else { REMOVE_ODD_BIT(logicalStart); logicalStart+=visualLimit-visualStart; /* logicalLimit */ do { /* RTL */ *pi++ = --logicalStart; } while(++visualStart<visualLimit); } /* visualStart==visualLimit; */ } if(pBiDi->insertPoints.size>0) { int32_t markFound=0, runCount=pBiDi->runCount; int32_t insertRemove, i, j, k; runs=pBiDi->runs; /* count all inserted marks */ for(i=0; i<runCount; i++) { insertRemove=runs[i].insertRemove; if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { markFound++; } if(insertRemove&(LRM_AFTER|RLM_AFTER)) { markFound++; } } /* move back indexes by number of preceding marks */ k=pBiDi->resultLength; for(i=runCount-1; i>=0 && markFound>0; i--) { insertRemove=runs[i].insertRemove; if(insertRemove&(LRM_AFTER|RLM_AFTER)) { indexMap[--k]= UBIDI_MAP_NOWHERE; markFound--; } visualStart= i>0 ? runs[i-1].visualLimit : 0; for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { indexMap[--k]=indexMap[j]; } if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { indexMap[--k]= UBIDI_MAP_NOWHERE; markFound--; } } } else if(pBiDi->controlCount>0) { int32_t runCount=pBiDi->runCount, logicalEnd; int32_t insertRemove, length, i, j, k, m; UChar uchar; UBool evenRun; runs=pBiDi->runs; visualStart=0; /* move forward indexes by number of preceding controls */ k=0; for(i=0; i<runCount; i++, visualStart+=length) { length=runs[i].visualLimit-visualStart; insertRemove=runs[i].insertRemove; /* if no control found yet, nothing to do in this run */ if((insertRemove==0)&&(k==visualStart)) { k+=length; continue; } /* if no control in this run */ if(insertRemove==0) { visualLimit=runs[i].visualLimit; for(j=visualStart; j<visualLimit; j++) { indexMap[k++]=indexMap[j]; } continue; } logicalStart=runs[i].logicalStart; evenRun=IS_EVEN_RUN(logicalStart); REMOVE_ODD_BIT(logicalStart); logicalEnd=logicalStart+length-1; for(j=0; j<length; j++) { m= evenRun ? logicalStart+j : logicalEnd-j; uchar=pBiDi->text[m]; if(!IS_BIDI_CONTROL_CHAR(uchar)) { indexMap[k++]=m; } } } } } }
bool hasKey(const char* key) { UErrorCode status = U_ZERO_ERROR; ures_getStringByKey(bundle_, key, NULL, &status); return U_SUCCESS(status); }