static void normalizeCharacters(const TextRun& run, UChar* destination, int length) { int position = 0; bool error = false; const UChar* source; String stringFor8BitRun; if (run.is8Bit()) { stringFor8BitRun = String::make16BitFrom8BitSource(run.characters8(), run.length()); source = stringFor8BitRun.characters16(); } else source = run.characters16(); while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); // Don't normalize tabs as they are not treated as spaces for word-end. if (Font::treatAsSpace(character) && character != '\t') character = ' '; else if (Font::treatAsZeroWidthSpaceInComplexScript(character)) character = zeroWidthSpace; U16_APPEND(destination, position, length, character, error); ASSERT_UNUSED(error, !error); position = nextPosition; } }
static void normalizeCharacters(const TextRun& run, unsigned length, UChar* destination, unsigned* destinationLength) { unsigned position = 0; bool error = false; const UChar* source; String stringFor8BitRun; if (run.is8Bit()) { stringFor8BitRun = String::make16BitFrom8BitSource(run.characters8(), run.length()); source = stringFor8BitRun.characters16(); } else { source = run.characters16(); } *destinationLength = 0; while (position < length) { UChar32 character; U16_NEXT(source, position, length, character); // Don't normalize tabs as they are not treated as spaces for word-end. if (run.normalizeSpace() && Character::isNormalizedCanvasSpaceCharacter(character)) character = spaceCharacter; else if (Character::treatAsSpace(character) && character != noBreakSpaceCharacter) character = spaceCharacter; else if (Character::treatAsZeroWidthSpaceInComplexScript(character)) character = zeroWidthSpaceCharacter; U16_APPEND(destination, *destinationLength, length, character, error); ASSERT_UNUSED(error, !error); } }
static void TestAppend() { static const UChar32 codePoints[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0x10000, 0x12345, 0xe0021, 0x10ffff, 0x110000, 0x234567, 0x7fffffff, -1, -1000, 0, 0x400 }; static const UChar expectUnsafe[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ /* none from this line */ 0, 0x400 }, expectSafe[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ /* none from this line */ 0, 0x400 }; UChar buffer[100]; UChar32 c; int32_t i, length; UBool isError, expectIsError, wrongIsError; length=0; for(i=0; i<LENGTHOF(codePoints); ++i) { c=codePoints[i]; if(c<0 || 0x10ffff<c) { continue; /* skip non-code points for U16_APPEND_UNSAFE */ } U16_APPEND_UNSAFE(buffer, length, c); } if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) { log_err("U16_APPEND_UNSAFE did not generate the expected output\n"); } length=0; wrongIsError=FALSE; for(i=0; i<LENGTHOF(codePoints); ++i) { c=codePoints[i]; expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); isError=FALSE; U16_APPEND(buffer, length, LENGTHOF(buffer), c, isError); wrongIsError|= isError!=expectIsError; } if(wrongIsError) { log_err("U16_APPEND did not set isError correctly\n"); } if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) { log_err("U16_APPEND did not generate the expected output\n"); } }
U_NAMESPACE_USE /* string casing ------------------------------------------------------------ */ /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ static inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s) { UChar32 c; int32_t length; /* decode the result */ if(result<0) { /* (not) original code point */ c=~result; length=-1; } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; length=-1; } if(destIndex<destCapacity) { /* append the result */ if(length<0) { /* code point */ UBool isError=FALSE; U16_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ destIndex+=U16_LENGTH(c); } } else { /* string */ if((destIndex+length)<=destCapacity) { while(length>0) { dest[destIndex++]=*s++; --length; } } else { /* overflow */ destIndex+=length; } } } else { /* preflight */ if(length<0) { destIndex+=U16_LENGTH(c); } else { destIndex+=length; } } return destIndex; }
/* Function: ChangeCase Performs upper or lower casing of a string into a new buffer. No special casing is performed beyond that provided by ICU. */ extern "C" void ChangeCase(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) { // Iterate through the string, decoding the next one or two UTF-16 code units // into a codepoint and updating srcIdx to point to the next UTF-16 code unit // to decode. Then upper or lower case it, write dstCodepoint into lpDst at // offset dstIdx, and update dstIdx. // (The loop here has been manually cloned for each of the four cases, rather // than having a single loop that internally branched based on bToUpper as the // compiler wasn't doing that optimization, and it results in an ~15-20% perf // improvement on longer strings.) UBool isError = FALSE; int32_t srcIdx = 0, dstIdx = 0; UChar32 srcCodepoint, dstCodepoint; if (bToUpper) { while (srcIdx < cwSrcLength) { U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = u_toupper(srcCodepoint); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } else { while (srcIdx < cwSrcLength) { U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = u_tolower(srcCodepoint); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } }
/* Function: ChangeCaseInvariant Performs upper or lower casing of a string into a new buffer. Special casing is performed to ensure that invariant casing matches that of Windows in certain situations, e.g. Turkish i's. */ extern "C" void ChangeCaseInvariant(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) { // See algorithmic comment in ChangeCase. UBool isError = FALSE; int32_t srcIdx = 0, dstIdx = 0; UChar32 srcCodepoint, dstCodepoint; if (bToUpper) { while (srcIdx < cwSrcLength) { // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). // We special case it to match the Windows invariant behavior. U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint)); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } else { while (srcIdx < cwSrcLength) { // On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) // lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069). // We special case it to match the Windows invariant behavior. U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint)); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } }
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fputc(UChar32 uc, UFILE *f) { UChar buf[2]; int32_t idx = 0; UBool isError = FALSE; U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); if (isError) { return U_EOF; } return u_file_write(buf, idx, f) == idx ? uc : U_EOF; }
/* Function: ChangeCaseTurkish Performs upper or lower casing of a string into a new buffer, performing special casing for Turkish. */ extern "C" void ChangeCaseTurkish(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) { // See algorithmic comment in ChangeCase. UBool isError = FALSE; int32_t srcIdx = 0, dstIdx = 0; UChar32 srcCodepoint, dstCodepoint; if (bToUpper) { while (srcIdx < cwSrcLength) { // In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN // CAPITAL LETTER I WITH DOT ABOVE (U+0130). U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint)); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } else { while (srcIdx < cwSrcLength) { // In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to // LATIN SMALL LETTER DOTLESS I (U+0131). U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint)); U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); assert(isError == FALSE && srcIdx == dstIdx); } } }
void mirrorCharacters(UChar* destination, const UChar* source, int length) const { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT(!error); position = nextPosition; } }
static void normalizeCharacters(const UChar* source, UChar* destination, int length) { int position = 0; bool error = false; while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); // Don't normalize tabs as they are not treated as spaces for word-end. if (Font::treatAsSpace(character) && character != '\t') character = ' '; else if (Font::treatAsZeroWidthSpaceInComplexScript(character)) character = zeroWidthSpace; U16_APPEND(destination, position, length, character, error); ASSERT_UNUSED(error, !error); position = nextPosition; } }
void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length) { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); if (Font::treatAsSpace(character)) character = ' '; else if (Font::treatAsZeroWidthSpace(character)) character = zeroWidthSpace; else if (rtl) character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT(!error); position = nextPosition; } }
static void normalizeSpacesAndMirrorChars(const UChar* source, UChar* destination, int length, HarfBuzzShaperBase::NormalizeMode normalizeMode) { int position = 0; bool error = false; // Iterate characters in source and mirror character if needed. while (position < length) { UChar32 character; int nextPosition = position; U16_NEXT(source, nextPosition, length, character); if (Font::treatAsSpace(character)) character = ' '; else if (Font::treatAsZeroWidthSpace(character)) character = zeroWidthSpace; else if (normalizeMode == HarfBuzzShaperBase::NormalizeMirrorChars) character = u_charMirror(character); U16_APPEND(destination, position, length, character, error); ASSERT_UNUSED(error, !error); position = nextPosition; } }
static void demo_utf_h_macros() { static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 }; UChar32 c; int32_t i; UBool isError; printf("\n* demo_utf_h_macros() -------------- ***\n\n"); printUString("iterate forward through: ", input, UPRV_LENGTHOF(input)); for(i=0; i<UPRV_LENGTHOF(input); /* U16_NEXT post-increments */) { /* Iterating forwards Codepoint at offset 0: U+0061 Codepoint at offset 1: U+10000 Codepoint at offset 3: U+10ffff Codepoint at offset 5: U+0062 */ printf("Codepoint at offset %d: U+", i); U16_NEXT(input, i, UPRV_LENGTHOF(input), c); printf("%04x\n", c); } puts(""); isError=FALSE; i=1; /* write position, gets post-incremented so needs to be in an l-value */ U16_APPEND(input, i, UPRV_LENGTHOF(input), 0x0062, isError); printUString("iterate backward through: ", input, UPRV_LENGTHOF(input)); for(i=UPRV_LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) { U16_PREV(input, 0, i, c); /* Iterating backwards Codepoint at offset 5: U+0062 Codepoint at offset 3: U+10ffff Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten Codepoint at offset 1: U+0062 -- by this BMP code point Codepoint at offset 0: U+0061 */ printf("Codepoint at offset %d: U+%04x\n", i, c); } }
/* ** Prepare to begin tokenizing a particular string. The input ** string to be tokenized is pInput[0..nBytes-1]. A cursor ** used to incrementally tokenize this string is returned in ** *ppCursor. */ static int icuOpen( sqlite3_tokenizer *pTokenizer, /* The tokenizer */ const char *zInput, /* Input string */ int nInput, /* Length of zInput in bytes */ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ ){ IcuTokenizer *p = (IcuTokenizer *)pTokenizer; IcuCursor *pCsr; const int32_t opt = U_FOLD_CASE_DEFAULT; UErrorCode status = U_ZERO_ERROR; int nChar; UChar32 c; int iInput = 0; int iOut = 0; *ppCursor = 0; if( nInput<0 ){ nInput = strlen(zInput); } nChar = nInput+1; pCsr = (IcuCursor *)sqlite3_malloc( sizeof(IcuCursor) + /* IcuCursor */ nChar * sizeof(UChar) + /* IcuCursor.aChar[] */ (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ ); if( !pCsr ){ return SQLITE_NOMEM; } memset(pCsr, 0, sizeof(IcuCursor)); pCsr->aChar = (UChar *)&pCsr[1]; pCsr->aOffset = (int *)&pCsr->aChar[nChar]; pCsr->aOffset[iOut] = iInput; U8_NEXT(zInput, iInput, nInput, c); while( c>0 ){ int isError = 0; c = u_foldCase(c, opt); U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); if( isError ){ sqlite3_free(pCsr); return SQLITE_ERROR; } pCsr->aOffset[iOut] = iInput; if( iInput<nInput ){ U8_NEXT(zInput, iInput, nInput, c); }else{ c = 0; } } pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); if( !U_SUCCESS(status) ){ sqlite3_free(pCsr); return SQLITE_ERROR; } pCsr->nChar = iOut; ubrk_first(pCsr->pIter); *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; return SQLITE_OK; }
static void demoCaseMapInC() { /* * input= * "aB<capital sigma>" * "iI<small dotless i><capital dotted I> " * "<sharp s> <small lig. ffi>" * "<small final sigma><small sigma><capital sigma>" */ static const UChar input[]={ 0x61, 0x42, 0x3a3, 0x69, 0x49, 0x131, 0x130, 0x20, 0xdf, 0x20, 0xfb03, 0x3c2, 0x3c3, 0x3a3, 0 }; UChar buffer[32]; UErrorCode errorCode; UChar32 c; int32_t i, j, length; UBool isError; printf("\n* demoCaseMapInC() ----------------- ***\n\n"); /* * First, use simple case mapping functions which provide * 1:1 code point mappings without context/locale ID. * * Note that some mappings will not be "right" because some "real" * case mappings require context, depend on the locale ID, * and/or result in a change in the number of code points. */ printUString("input string: ", input, -1); /* uppercase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_toupper(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-uppercased: ", buffer, j); /* lowercase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_tolower(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-lowercased: ", buffer, j); /* titlecase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_totitle(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-titlecased: ", buffer, j); /* case-fold/default */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_foldCase(c, U_FOLD_CASE_DEFAULT); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-case-folded/default: ", buffer, j); /* case-fold/Turkic */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-case-folded/Turkic: ", buffer, j); /* * Second, use full case mapping functions which provide * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID. * * Note that lower/upper/titlecasing take a locale ID while case-folding * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file. * * Also, string titlecasing requires a BreakIterator to find starts of words. * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default * titlecasing BreakIterator automatically. * For production code where many strings are titlecased it would be more efficient * to open a BreakIterator externally and pass it in. */ printUString("\ninput string: ", input, -1); /* lowercase/English */ errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-lowercased/en: ", buffer, length); } else { printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* lowercase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-lowercased/tr: ", buffer, length); } else { printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* uppercase/English */ errorCode=U_ZERO_ERROR; length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-uppercased/en: ", buffer, length); } else { printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* uppercase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-uppercased/tr: ", buffer, length); } else { printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* titlecase/English */ errorCode=U_ZERO_ERROR; length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-titlecased/en: ", buffer, length); } else { printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* titlecase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-titlecased/tr: ", buffer, length); } else { printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* case-fold/default */ errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-case-folded/default: ", buffer, length); } else { printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode)); } /* case-fold/Turkic */ errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-case-folded/Turkic: ", buffer, length); } else { printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode)); } }
static int32_t u_scanf_scanset_handler(UFILE *input, u_scanf_spec_info *info, ufmt_args *args, const UChar *fmt, int32_t *fmtConsumed, int32_t *argConverted) { USet *scanset; UErrorCode status = U_ZERO_ERROR; int32_t chLeft = INT32_MAX; UChar32 c; UChar *alias = (UChar*) (args[0].ptrValue); UBool isNotEOF = FALSE; UBool readCharacter = FALSE; /* Create an empty set */ scanset = uset_open(0, -1); /* Back up one to get the [ */ fmt--; /* truncate to the width, if specified and alias the target */ if(info->fWidth >= 0) { chLeft = info->fWidth; } /* parse the scanset from the fmt string */ *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); /* verify that the parse was successful */ if (U_SUCCESS(status)) { c=0; /* grab characters one at a time and make sure they are in the scanset */ while(chLeft > 0) { if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { readCharacter = TRUE; if (!info->fSkipArg) { int32_t idx = 0; UBool isError = FALSE; U16_APPEND(alias, idx, chLeft, c, isError); if (isError) { break; } alias += idx; } chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); } else { /* if the character's not in the scanset, break out */ break; } } /* put the final character we read back on the input */ if(isNotEOF && chLeft > 0) { u_fungetc(c, input); } } uset_close(scanset); /* if we didn't match at least 1 character, fail */ if(!readCharacter) return -1; /* otherwise, add the terminator */ else if (!info->fSkipArg) { *alias = 0x00; } /* we converted 1 arg */ *argConverted = !info->fSkipArg; return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; }
/** @bug 4108762 * Test for getMaxExpansion() */ static void TestMaxExpansion() { UErrorCode status = U_ZERO_ERROR; UCollator *coll ;/*= ucol_open("en_US", &status);*/ UChar ch = 0; UChar32 unassigned = 0xEFFFD; UChar supplementary[2]; uint32_t stringOffset = 0; UBool isError = FALSE; uint32_t sorder = 0; UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/ uint32_t temporder = 0; UChar rule[256]; u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch"); coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_SUCCESS(status) && coll) { iter = ucol_openElements(coll, &ch, 1, &status); while (ch < 0xFFFF && U_SUCCESS(status)) { int count = 1; uint32_t order; int32_t size = 0; ch ++; ucol_setText(iter, &ch, 1, &status); order = ucol_previous(iter, &status); /* thai management */ if (order == 0) order = ucol_previous(iter, &status); while (U_SUCCESS(status) && ucol_previous(iter, &status) != UCOL_NULLORDER) { count ++; } size = ucol_getMaxExpansion(iter, order); if (U_FAILURE(status) || size < count) { log_err("Failure at codepoint %d, maximum expansion count < %d\n", ch, count); } } /* testing for exact max expansion */ ch = 0; while (ch < 0x61) { uint32_t order; int32_t size; ucol_setText(iter, &ch, 1, &status); order = ucol_previous(iter, &status); size = ucol_getMaxExpansion(iter, order); if (U_FAILURE(status) || size != 1) { log_err("Failure at codepoint %d, maximum expansion count < %d\n", ch, 1); } ch ++; } ch = 0x63; ucol_setText(iter, &ch, 1, &status); temporder = ucol_previous(iter, &status); if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) { log_err("Failure at codepoint %d, maximum expansion count != %d\n", ch, 3); } ch = 0x64; ucol_setText(iter, &ch, 1, &status); temporder = ucol_previous(iter, &status); if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) { log_err("Failure at codepoint %d, maximum expansion count != %d\n", ch, 3); } U16_APPEND(supplementary, stringOffset, 2, unassigned, isError); (void)isError; /* Suppress set but not used warning. */ ucol_setText(iter, supplementary, 2, &status); sorder = ucol_previous(iter, &status); if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) { log_err("Failure at codepoint %d, maximum expansion count < %d\n", ch, 2); } /* testing jamo */ ch = 0x1165; ucol_setText(iter, &ch, 1, &status); temporder = ucol_previous(iter, &status); if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) { log_err("Failure at codepoint %d, maximum expansion count > %d\n", ch, 3); } ucol_closeElements(iter); ucol_close(coll); /* testing special jamo &a<\u1160 */ rule[0] = 0x26; rule[1] = 0x71; rule[2] = 0x3c; rule[3] = 0x1165; rule[4] = 0x2f; rule[5] = 0x71; rule[6] = 0x71; rule[7] = 0x71; rule[8] = 0x71; rule[9] = 0; coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); iter = ucol_openElements(coll, &ch, 1, &status); temporder = ucol_previous(iter, &status); if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) { log_err("Failure at codepoint %d, maximum expansion count > %d\n", ch, 5); } ucol_closeElements(iter); ucol_close(coll); } else { log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); } }