void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); if (U_FAILURE(status)) { return; } UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { initialLabels_->addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // The locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // question: should we add auxiliary exemplars? if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables // cut down to small list exemplars.remove(0xAC00, 0xD7A3). add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). add(0xD30C).add(0xD558); } if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSet ethiopic( UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); UnicodeSetIterator it(ethiopic); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); initialLabels_->add(upperC); } }
std::string kiwix::ucFirst (const std::string &word) { if (word.empty()) return ""; std::string ucFirstWord; #ifdef __ANDROID__ ucFirstWord = word; ucFirstWord[0] = toupper(ucFirstWord[0]); #else UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str()); UnicodeString ucFirstLetter = firstLetter.toUpper(); ucFirstLetter.toUTF8String(ucFirstWord); ucFirstWord += word.substr(1); #endif return ucFirstWord; }
void StringCaseTest::TestCaseConversion() { static const UChar uppercaseGreek[] = { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, 0x39f, 0x3a3, 0 }; // "IESUS CHRISTOS" static const UChar lowercaseGreek[] = { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, 0x3bf, 0x3c2, 0 }; // "iesus christos" static const UChar lowercaseTurkish[] = { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; static const UChar uppercaseTurkish[] = { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; UnicodeString expectedResult; UnicodeString test3; test3 += (UChar32)0x0130; test3 += "STANBUL, NOT CONSTANTINOPLE!"; UnicodeString test4(test3); test4.toLower(Locale("")); expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); if (test4 != expectedResult) errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); test4 = test3; test4.toLower(Locale("tr", "TR")); expectedResult = lowercaseTurkish; if (test4 != expectedResult) errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); test3 = "topkap"; test3 += (UChar32)0x0131; test3 += " palace, istanbul"; test4 = test3; test4.toUpper(Locale("")); expectedResult = "TOPKAPI PALACE, ISTANBUL"; if (test4 != expectedResult) errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); test4 = test3; test4.toUpper(Locale("tr", "TR")); expectedResult = uppercaseTurkish; if (test4 != expectedResult) errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); test3.toUpper(Locale("de", "DE")); expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); if (test3 != expectedResult) errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); test4.replace(0, test4.length(), uppercaseGreek); test4.toLower(Locale("el", "GR")); expectedResult = lowercaseGreek; if (test4 != expectedResult) errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); test4.replace(0, test4.length(), lowercaseGreek); test4.toUpper(); expectedResult = uppercaseGreek; if (test4 != expectedResult) errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); // more string case mapping tests with the new implementation { static const UChar beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, beforeMiniUpper[]= { 0xdf, 0x61 }, miniUpper[]= { 0x53, 0x53, 0x41 }; UnicodeString s; /* lowercase with root locale */ s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR)); s.toLower(""); if( s.length()!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || s!=UnicodeString(FALSE, lowerRoot, s.length()) ) { errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, (int32_t)(sizeof(lowerRoot)/U_SIZEOF_UCHAR)) + "\""); } /* lowercase with turkish locale */ s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR)); s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); if( s.length()!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) || s!=UnicodeString(FALSE, lowerTurkish, s.length()) ) { errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, (int32_t)(sizeof(lowerTurkish)/U_SIZEOF_UCHAR)) + "\""); } /* uppercase with root locale */ s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR)); s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); if( s.length()!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) || s!=UnicodeString(FALSE, upperRoot, s.length()) ) { errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, (int32_t)(sizeof(upperRoot)/U_SIZEOF_UCHAR)) + "\""); } /* uppercase with turkish locale */ s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR)); s.toUpper(Locale("tr")); if( s.length()!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) || s!=UnicodeString(FALSE, upperTurkish, s.length()) ) { errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, (int32_t)(sizeof(upperTurkish)/U_SIZEOF_UCHAR)) + "\""); } /* uppercase a short string with root locale */ s=UnicodeString(FALSE, beforeMiniUpper, (int32_t)(sizeof(beforeMiniUpper)/U_SIZEOF_UCHAR)); s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); if( s.length()!=(sizeof(miniUpper)/U_SIZEOF_UCHAR) || s!=UnicodeString(FALSE, miniUpper, s.length()) ) { errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, (int32_t)(sizeof(miniUpper)/U_SIZEOF_UCHAR)) + "\""); } } // test some supplementary characters (>= Unicode 3.1) { UnicodeString t; UnicodeString deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); (t=deseretInput).toLower(); if(t!=deseretLower) { errln("error lowercasing Deseret (plane 1) characters"); } (t=deseretInput).toUpper(); if(t!=deseretUpper) { errln("error uppercasing Deseret (plane 1) characters"); } } // test some more cases that looked like problems { UnicodeString t; UnicodeString ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); (t=ljInput).toLower("en"); if(t!=ljLower) { errln("error lowercasing LJ characters"); } (t=ljInput).toUpper("en"); if(t!=ljUpper) { errln("error uppercasing LJ characters"); } } #if !UCONFIG_NO_NORMALIZATION // some context-sensitive casing depends on normalization data being present // Unicode 3.1.1 SpecialCasing tests { UnicodeString t; // sigmas preceded and/or followed by cased letters UnicodeString sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); (t=sigmas).toLower(); if(t!=sigmasLower) { errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); } (t=sigmas).toUpper(Locale("")); if(t!=sigmasUpper) { errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); } // turkish & azerbaijani dotless i & dotted I // remove dot above if there was a capital I before and there are no more accents above UnicodeString dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); (t=dots).toLower("tr"); if(t!=dotsTurkish) { errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); } (t=dots).toLower("de"); if(t!=dotsDefault) { errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); } } // more Unicode 3.1.1 tests { UnicodeString t; // lithuanian dot above in uppercasing UnicodeString dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); (t=dots).toUpper("lt"); if(t!=dotsLithuanian) { errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); } (t=dots).toUpper("de"); if(t!=dotsDefault) { errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); } // lithuanian adds dot above to i in lowercasing if there are more above accents UnicodeString i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); (t=i).toLower("lt"); if(t!=iLithuanian) { errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); } (t=i).toLower("de"); if(t!=iDefault) { errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); } } #endif // test case folding { UnicodeString s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), t; (t=s).foldCase(); if(f!=t) { errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); } // alternate handling for dotted I/dotless i (U+0130, U+0131) (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); if(g!=t) { errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); } } }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; IcuTestErrorCode errorCode(*this, "TestCasingImpl"); LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); if(errorCode.isFailure()) { errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); errorCode.reset(); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); errln("expected \"" + output + "\" got \"" + result + "\"" ); } }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; UCaseMap *csm; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open(localeID, options, &errorCode); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. int32_t size=1; // Not 0 because that only gives preflighting. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode); ucasemap_setBreakIterator(csm, clone, &errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? resultLength : 0); if(U_FAILURE(errorCode)) { errln("error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); } ucasemap_close(csm); }
UBool TimeZone::parseCustomID(const UnicodeString& id, int32_t& sign, int32_t& hour, int32_t& min, int32_t& sec) { static const int32_t kParseFailed = -99999; NumberFormat* numberFormat = 0; UnicodeString idUppercase = id; idUppercase.toUpper(); if (id.length() > GMT_ID_LENGTH && idUppercase.startsWith(GMT_ID)) { ParsePosition pos(GMT_ID_LENGTH); sign = 1; hour = 0; min = 0; sec = 0; if (id[pos.getIndex()] == MINUS /*'-'*/) { sign = -1; } else if (id[pos.getIndex()] != PLUS /*'+'*/) { return FALSE; } pos.setIndex(pos.getIndex() + 1); UErrorCode success = U_ZERO_ERROR; numberFormat = NumberFormat::createInstance(success); if(U_FAILURE(success)){ return FALSE; } numberFormat->setParseIntegerOnly(TRUE); // Look for either hh:mm, hhmm, or hh int32_t start = pos.getIndex(); Formattable n(kParseFailed); numberFormat->parse(id, n, pos); if (pos.getIndex() == start) { delete numberFormat; return FALSE; } hour = n.getLong(); if (pos.getIndex() < id.length()) { if (pos.getIndex() - start > 2 || id[pos.getIndex()] != COLON) { delete numberFormat; return FALSE; } // hh:mm pos.setIndex(pos.getIndex() + 1); int32_t oldPos = pos.getIndex(); n.setLong(kParseFailed); numberFormat->parse(id, n, pos); if ((pos.getIndex() - oldPos) != 2) { // must be 2 digits delete numberFormat; return FALSE; } min = n.getLong(); if (pos.getIndex() < id.length()) { if (id[pos.getIndex()] != COLON) { delete numberFormat; return FALSE; } // [:ss] pos.setIndex(pos.getIndex() + 1); oldPos = pos.getIndex(); n.setLong(kParseFailed); numberFormat->parse(id, n, pos); if (pos.getIndex() != id.length() || (pos.getIndex() - oldPos) != 2) { delete numberFormat; return FALSE; } sec = n.getLong(); } } else { // Supported formats are below - // // HHmmss // Hmmss // HHmm // Hmm // HH // H int32_t length = pos.getIndex() - start; if (length <= 0 || 6 < length) { // invalid length delete numberFormat; return FALSE; } switch (length) { case 1: case 2: // already set to hour break; case 3: case 4: min = hour % 100; hour /= 100; break; case 5: case 6: sec = hour % 100; min = (hour/100) % 100; hour /= 10000; break; } } delete numberFormat; if (hour > kMAX_CUSTOM_HOUR || min > kMAX_CUSTOM_MIN || sec > kMAX_CUSTOM_SEC) { return FALSE; } return TRUE; } return FALSE; }
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } // Chinese index characters, which are specific to each of the several Chinese tailorings, // take precedence over the single locale data exemplar set per language. const char *language = locale.getLanguage(); if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 || uprv_strcmp(language, "ko") == 0) { // TODO: This should be done regardless of the language, but it's expensive. // We should add a Collator function (can be @internal) // to enumerate just the contractions that start with a given code point or string. if (addChineseIndexCharacters(status) || U_FAILURE(status)) { return; } } LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); if (U_FAILURE(status)) { return; } UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { initialLabels_->addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // The locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // question: should we add auxiliary exemplars? if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables // cut down to small list exemplars.remove(0xAC00, 0xD7A3). add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). add(0xD30C).add(0xD558); } if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSet ethiopic( UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); UnicodeSetIterator it(ethiopic); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); initialLabels_->add(upperC); } }
void AlphabeticIndex::getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { dest.addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // Locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; UnicodeSet lowersToRemove; UnicodeSet uppersToAdd; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); if (exemplarC != upperC) { lowersToRemove.add(exemplarC); uppersToAdd.add(upperC); } } exemplars.removeAll(lowersToRemove); exemplars.addAll(uppersToAdd); // get the exemplars, and handle special cases // question: should we add auxiliary exemplars? if (exemplars.containsSome(*CORE_LATIN)) { exemplars.addAll(*CORE_LATIN); } if (exemplars.containsSome(*HANGUL)) { // cut down to small list UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status); exemplars.removeAll(BLOCK_HANGUL_SYLLABLES); exemplars.addAll(*HANGUL); } if (exemplars.containsSome(*ETHIOPIC)) { // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSetIterator it(*ETHIOPIC); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } dest.addAll(exemplars); }
void grid_renderer<T>::process(text_symbolizer const& sym, Feature const& feature, proj_transform const& prj_trans) { typedef coord_transform2<CoordTransform,geometry_type> path_type; bool placement_found = false; text_placement_info_ptr placement_options = sym.get_placement_options()->get_placement_info(); while (!placement_found && placement_options->next()) { expression_ptr name_expr = sym.get_name(); if (!name_expr) return; value_type result = boost::apply_visitor(evaluate<Feature,value_type>(feature),*name_expr); UnicodeString text = result.to_unicode(); if ( sym.get_text_transform() == UPPERCASE) { text = text.toUpper(); } else if ( sym.get_text_transform() == LOWERCASE) { text = text.toLower(); } else if ( sym.get_text_transform() == CAPITALIZE) { text = text.toTitle(NULL); } if ( text.length() <= 0 ) continue; color const& fill = sym.get_fill(); face_set_ptr faces; if (sym.get_fontset().size() > 0) { faces = font_manager_.get_face_set(sym.get_fontset()); } else { faces = font_manager_.get_face_set(sym.get_face_name()); } stroker_ptr strk = font_manager_.get_stroker(); if (!(faces->size() > 0 && strk)) { throw config_error("Unable to find specified font face '" + sym.get_face_name() + "'"); } text_renderer<T> ren(pixmap_, faces, *strk); ren.set_pixel_size(placement_options->text_size * (scale_factor_ * (1.0/pixmap_.get_resolution()))); ren.set_fill(fill); ren.set_halo_fill(sym.get_halo_fill()); ren.set_halo_radius(sym.get_halo_radius() * scale_factor_); ren.set_opacity(sym.get_text_opacity()); // /pixmap_.get_resolution() ? box2d<double> dims(0,0,width_,height_); placement_finder<label_collision_detector4> finder(detector_,dims); string_info info(text); faces->get_string_info(info); unsigned num_geom = feature.num_geometries(); for (unsigned i=0; i<num_geom; ++i) { geometry_type const& geom = feature.get_geometry(i); if (geom.num_points() == 0) continue; // don't bother with empty geometries while (!placement_found && placement_options->next_position_only()) { placement text_placement(info, sym, scale_factor_); text_placement.avoid_edges = sym.get_avoid_edges(); if (sym.get_label_placement() == POINT_PLACEMENT || sym.get_label_placement() == INTERIOR_PLACEMENT) { double label_x, label_y, z=0.0; if (sym.get_label_placement() == POINT_PLACEMENT) geom.label_position(&label_x, &label_y); else geom.label_interior_position(&label_x, &label_y); prj_trans.backward(label_x,label_y, z); t_.forward(&label_x,&label_y); double angle = 0.0; expression_ptr angle_expr = sym.get_orientation(); if (angle_expr) { // apply rotation value_type result = boost::apply_visitor(evaluate<Feature,value_type>(feature),*angle_expr); angle = result.to_double(); } finder.find_point_placement(text_placement, placement_options, label_x, label_y, angle, sym.get_line_spacing(), sym.get_character_spacing()); finder.update_detector(text_placement); } else if ( geom.num_points() > 1 && sym.get_label_placement() == LINE_PLACEMENT) { path_type path(t_,geom,prj_trans); finder.find_line_placements<path_type>(text_placement, placement_options, path); } if (!text_placement.placements.size()) continue; placement_found = true; for (unsigned int ii = 0; ii < text_placement.placements.size(); ++ii) { double x = text_placement.placements[ii].starting_x; double y = text_placement.placements[ii].starting_y; ren.prepare_glyphs(&text_placement.placements[ii]); ren.render_id(feature.id(),x,y,2); } } } } if (placement_found) pixmap_.add_feature(feature); }
/** * Parse a custom time zone identifier and return a corresponding zone. * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or * GMT[+-]hh. * @return a newly created SimpleTimeZone with the given offset and * no Daylight Savings Time, or null if the id cannot be parsed. */ TimeZone* TimeZone::createCustomTimeZone(const UnicodeString& id) { static const int32_t kParseFailed = -99999; NumberFormat* numberFormat = 0; UnicodeString idUppercase = id; idUppercase.toUpper(); if (id.length() > GMT_ID_LENGTH && idUppercase.startsWith(GMT_ID)) { ParsePosition pos(GMT_ID_LENGTH); UBool negative = FALSE; int32_t offset; if (id[pos.getIndex()] == 0x002D /*'-'*/) negative = TRUE; else if (id[pos.getIndex()] != 0x002B /*'+'*/) return 0; pos.setIndex(pos.getIndex() + 1); UErrorCode success = U_ZERO_ERROR; numberFormat = NumberFormat::createInstance(success); numberFormat->setParseIntegerOnly(TRUE); // Look for either hh:mm, hhmm, or hh int32_t start = pos.getIndex(); Formattable n(kParseFailed); numberFormat->parse(id, n, pos); if (pos.getIndex() == start) { delete numberFormat; return 0; } offset = n.getLong(); if (pos.getIndex() < id.length() && id[pos.getIndex()] == 0x003A /*':'*/) { // hh:mm offset *= 60; pos.setIndex(pos.getIndex() + 1); int32_t oldPos = pos.getIndex(); n.setLong(kParseFailed); numberFormat->parse(id, n, pos); if (pos.getIndex() == oldPos) { delete numberFormat; return 0; } offset += n.getLong(); } else { // hhmm or hh // Be strict about interpreting something as hh; it must be // an offset < 30, and it must be one or two digits. Thus // 0010 is interpreted as 00:10, but 10 is interpreted as // 10:00. if (offset < 30 && (pos.getIndex() - start) <= 2) offset *= 60; // hh, from 00 to 29; 30 is 00:30 else offset = offset % 100 + offset / 100 * 60; // hhmm } if(negative) offset = -offset; delete numberFormat; return new SimpleTimeZone(offset * 60000, CUSTOM_ID); } return 0; }
void agg_renderer<T>::process(shield_symbolizer const& sym, Feature const& feature, proj_transform const& prj_trans) { typedef coord_transform2<CoordTransform,geometry_type> path_type; text_placement_info_ptr placement_options = sym.get_placement_options()->get_placement_info(); placement_options->next(); placement_options->next_position_only(); UnicodeString text; if( sym.get_no_text() ) text = UnicodeString( " " ); // TODO: fix->use 'space' as the text to render else { expression_ptr name_expr = sym.get_name(); if (!name_expr) return; value_type result = boost::apply_visitor(evaluate<Feature,value_type>(feature),*name_expr); text = result.to_unicode(); } if ( sym.get_text_transform() == UPPERCASE) { text = text.toUpper(); } else if ( sym.get_text_transform() == LOWERCASE) { text = text.toLower(); } else if ( sym.get_text_transform() == CAPITALIZE) { text = text.toTitle(NULL); } agg::trans_affine tr; boost::array<double,6> const& m = sym.get_transform(); tr.load_from(&m[0]); std::string filename = path_processor_type::evaluate( *sym.get_filename(), feature); boost::optional<mapnik::marker_ptr> marker; if ( !filename.empty() ) { marker = marker_cache::instance()->find(filename, true); } else { marker.reset(boost::make_shared<mapnik::marker>()); } if (text.length() > 0 && marker) { int w = (*marker)->width(); int h = (*marker)->height(); double px0 = - 0.5 * w; double py0 = - 0.5 * h; double px1 = 0.5 * w; double py1 = 0.5 * h; double px2 = px1; double py2 = py0; double px3 = px0; double py3 = py1; tr.transform(&px0,&py0); tr.transform(&px1,&py1); tr.transform(&px2,&py2); tr.transform(&px3,&py3); box2d<double> label_ext (px0, py0, px1, py1); label_ext.expand_to_include(px2, py2); label_ext.expand_to_include(px3, py3); face_set_ptr faces; if (sym.get_fontset().size() > 0) { faces = font_manager_.get_face_set(sym.get_fontset()); } else { faces = font_manager_.get_face_set(sym.get_face_name()); } stroker_ptr strk = font_manager_.get_stroker(); if (strk && faces->size() > 0) { text_renderer<T> ren(pixmap_, faces, *strk); ren.set_pixel_size(sym.get_text_size() * scale_factor_); ren.set_fill(sym.get_fill()); ren.set_halo_fill(sym.get_halo_fill()); ren.set_halo_radius(sym.get_halo_radius() * scale_factor_); ren.set_opacity(sym.get_text_opacity()); placement_finder<label_collision_detector4> finder(detector_); string_info info(text); faces->get_string_info(info); metawriter_with_properties writer = sym.get_metawriter(); for (unsigned i = 0; i < feature.num_geometries(); ++i) { geometry_type const& geom = feature.get_geometry(i); if (geom.num_points() > 0 ) { path_type path(t_,geom,prj_trans); label_placement_enum how_placed = sym.get_label_placement(); if (how_placed == POINT_PLACEMENT || how_placed == VERTEX_PLACEMENT || how_placed == INTERIOR_PLACEMENT) { // for every vertex, try and place a shield/text geom.rewind(0); placement text_placement(info, sym, scale_factor_, w, h, false); text_placement.avoid_edges = sym.get_avoid_edges(); text_placement.allow_overlap = sym.get_allow_overlap(); if (writer.first) text_placement.collect_extents =true; // needed for inmem metawriter position const& pos = sym.get_displacement(); position const& shield_pos = sym.get_shield_displacement(); for( unsigned jj = 0; jj < geom.num_points(); jj++ ) { double label_x; double label_y; double z=0.0; if( how_placed == VERTEX_PLACEMENT ) geom.vertex(&label_x,&label_y); // by vertex else if( how_placed == INTERIOR_PLACEMENT ) geom.label_interior_position(&label_x,&label_y); else geom.label_position(&label_x, &label_y); // by middle of line or by point prj_trans.backward(label_x,label_y, z); t_.forward(&label_x,&label_y); label_x += boost::get<0>(shield_pos); label_y += boost::get<1>(shield_pos); finder.find_point_placement( text_placement, placement_options, label_x, label_y, 0.0, sym.get_line_spacing(), sym.get_character_spacing()); // check to see if image overlaps anything too, there is only ever 1 placement found for points and verticies if( text_placement.placements.size() > 0) { double x = floor(text_placement.placements[0].starting_x); double y = floor(text_placement.placements[0].starting_y); int px; int py; if( !sym.get_unlock_image() ) { // center image at text center position // remove displacement from image label double lx = x - boost::get<0>(pos); double ly = y - boost::get<1>(pos); px=int(floor(lx - (0.5 * w))) + 1; py=int(floor(ly - (0.5 * h))) + 1; label_ext.re_center(lx,ly); } else { // center image at reference location px=int(floor(label_x - 0.5 * w)); py=int(floor(label_y - 0.5 * h)); label_ext.re_center(label_x,label_y); } if ( sym.get_allow_overlap() || detector_.has_placement(label_ext) ) { render_marker(px,py,**marker,tr,sym.get_opacity()); box2d<double> dim = ren.prepare_glyphs(&text_placement.placements[0]); ren.render(x,y); detector_.insert(label_ext); finder.update_detector(text_placement); if (writer.first) { writer.first->add_box(label_ext, feature, t_, writer.second); writer.first->add_text(text_placement, faces, feature, t_, writer.second); } } } } } else if (geom.num_points() > 1 && how_placed == LINE_PLACEMENT) { placement text_placement(info, sym, scale_factor_, w, h, false); position const& pos = sym.get_displacement(); text_placement.avoid_edges = sym.get_avoid_edges(); text_placement.additional_boxes.push_back( box2d<double>(-0.5 * label_ext.width() - boost::get<0>(pos), -0.5 * label_ext.height() - boost::get<1>(pos), 0.5 * label_ext.width() - boost::get<0>(pos), 0.5 * label_ext.height() - boost::get<1>(pos))); finder.find_point_placements<path_type>(text_placement, placement_options, path); for (unsigned int ii = 0; ii < text_placement.placements.size(); ++ ii) { double x = floor(text_placement.placements[ii].starting_x); double y = floor(text_placement.placements[ii].starting_y); double lx = x - boost::get<0>(pos); double ly = y - boost::get<1>(pos); int px=int(floor(lx - (0.5*w))) + 1; int py=int(floor(ly - (0.5*h))) + 1; label_ext.re_center(lx, ly); render_marker(px,py,**marker,tr,sym.get_opacity()); box2d<double> dim = ren.prepare_glyphs(&text_placement.placements[ii]); ren.render(x,y); if (writer.first) writer.first->add_box(label_ext, feature, t_, writer.second); } finder.update_detector(text_placement); if (writer.first) writer.first->add_text(text_placement, faces, feature, t_, writer.second); } } } } } }