static void normalizeCharacters(const TextRun& run, UChar* destination, int length)
{
    int position = 0;
    bool error = false;
    const UChar* source;
    String stringFor8BitRun;
    if (run.is8Bit()) {
        stringFor8BitRun = String::make16BitFrom8BitSource(run.characters8(), run.length());
        source = stringFor8BitRun.characters16();
    } else
        source = run.characters16();

    while (position < length) {
        UChar32 character;
        int nextPosition = position;
        U16_NEXT(source, nextPosition, length, character);
        // Don't normalize tabs as they are not treated as spaces for word-end.
        if (Font::treatAsSpace(character) && character != '\t')
            character = ' ';
        else if (Font::treatAsZeroWidthSpaceInComplexScript(character))
            character = zeroWidthSpace;
        U16_APPEND(destination, position, length, character, error);
        ASSERT_UNUSED(error, !error);
        position = nextPosition;
    }
}
Exemple #2
0
static void normalizeCharacters(const TextRun& run, unsigned length, UChar* destination, unsigned* destinationLength)
{
    unsigned position = 0;
    bool error = false;
    const UChar* source;
    String stringFor8BitRun;
    if (run.is8Bit()) {
        stringFor8BitRun = String::make16BitFrom8BitSource(run.characters8(), run.length());
        source = stringFor8BitRun.characters16();
    } else {
        source = run.characters16();
    }

    *destinationLength = 0;
    while (position < length) {
        UChar32 character;
        U16_NEXT(source, position, length, character);
        // Don't normalize tabs as they are not treated as spaces for word-end.
        if (run.normalizeSpace() && Character::isNormalizedCanvasSpaceCharacter(character))
            character = spaceCharacter;
        else if (Character::treatAsSpace(character) && character != noBreakSpaceCharacter)
            character = spaceCharacter;
        else if (Character::treatAsZeroWidthSpaceInComplexScript(character))
            character = zeroWidthSpaceCharacter;

        U16_APPEND(destination, *destinationLength, length, character, error);
        ASSERT_UNUSED(error, !error);
    }
}
Exemple #3
0
static void TestAppend() {
    static const UChar32 codePoints[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0x10000,
        0x12345, 0xe0021, 0x10ffff, 0x110000,
        0x234567, 0x7fffffff, -1, -1000,
        0, 0x400
    };
    static const UChar expectUnsafe[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
        /* none from this line */
        0, 0x400
    }, expectSafe[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
        /* none from this line */
        0, 0x400
    };

    UChar buffer[100];
    UChar32 c;
    int32_t i, length;
    UBool isError, expectIsError, wrongIsError;

    length=0;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        if(c<0 || 0x10ffff<c) {
            continue; /* skip non-code points for U16_APPEND_UNSAFE */
        }

        U16_APPEND_UNSAFE(buffer, length, c);
    }
    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
        log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
    }

    length=0;
    wrongIsError=FALSE;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
        isError=FALSE;

        U16_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
        wrongIsError|= isError!=expectIsError;
    }
    if(wrongIsError) {
        log_err("U16_APPEND did not set isError correctly\n");
    }
    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
        log_err("U16_APPEND did not generate the expected output\n");
    }
}
Exemple #4
0
U_NAMESPACE_USE

/* string casing ------------------------------------------------------------ */

/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
             int32_t result, const UChar *s) {
    UChar32 c;
    int32_t length;

    /* decode the result */
    if(result<0) {
        /* (not) original code point */
        c=~result;
        length=-1;
    } else if(result<=UCASE_MAX_STRING_LENGTH) {
        c=U_SENTINEL;
        length=result;
    } else {
        c=result;
        length=-1;
    }

    if(destIndex<destCapacity) {
        /* append the result */
        if(length<0) {
            /* code point */
            UBool isError=FALSE;
            U16_APPEND(dest, destIndex, destCapacity, c, isError);
            if(isError) {
                /* overflow, nothing written */
                destIndex+=U16_LENGTH(c);
            }
        } else {
            /* string */
            if((destIndex+length)<=destCapacity) {
                while(length>0) {
                    dest[destIndex++]=*s++;
                    --length;
                }
            } else {
                /* overflow */
                destIndex+=length;
            }
        }
    } else {
        /* preflight */
        if(length<0) {
            destIndex+=U16_LENGTH(c);
        } else {
            destIndex+=length;
        }
    }
    return destIndex;
}
Exemple #5
0
/*
Function:
ChangeCase

Performs upper or lower casing of a string into a new buffer.
No special casing is performed beyond that provided by ICU.
*/
extern "C" void ChangeCase(const UChar* lpSrc,
                           int32_t cwSrcLength,
                           UChar* lpDst,
                           int32_t cwDstLength,
                           int32_t bToUpper)
{
	// Iterate through the string, decoding the next one or two UTF-16 code units
	// into a codepoint and updating srcIdx to point to the next UTF-16 code unit 
	// to decode.  Then upper or lower case it, write dstCodepoint into lpDst at 
	// offset dstIdx, and update dstIdx.

	// (The loop here has been manually cloned for each of the four cases, rather
	// than having a single loop that internally branched based on bToUpper as the 
	// compiler wasn't doing that optimization, and it results in an ~15-20% perf
	// improvement on longer strings.)

	UBool isError = FALSE;
	int32_t srcIdx = 0, dstIdx = 0;
	UChar32 srcCodepoint, dstCodepoint;

	if (bToUpper)
	{
		while (srcIdx < cwSrcLength)
		{
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = u_toupper(srcCodepoint);
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
	else
	{
		while (srcIdx < cwSrcLength)
		{
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = u_tolower(srcCodepoint);
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
}
Exemple #6
0
/*
Function:
ChangeCaseInvariant

Performs upper or lower casing of a string into a new buffer.
Special casing is performed to ensure that invariant casing 
matches that of Windows in certain situations, e.g. Turkish i's.
*/
extern "C" void ChangeCaseInvariant(const UChar* lpSrc,
                                    int32_t cwSrcLength,
                                    UChar* lpDst,
                                    int32_t cwDstLength,
                                    int32_t bToUpper)
{
	// See algorithmic comment in ChangeCase.

	UBool isError = FALSE;
	int32_t srcIdx = 0, dstIdx = 0;
	UChar32 srcCodepoint, dstCodepoint;

	if (bToUpper)
	{
		while (srcIdx < cwSrcLength)
		{
			// On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
			// capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
			// We special case it to match the Windows invariant behavior.
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint));
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
	else
	{
		while (srcIdx < cwSrcLength)
		{
			// On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130)
			// lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069).
			// We special case it to match the Windows invariant behavior.
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint));
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
}
Exemple #7
0
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32      uc,
        UFILE        *f)
{
    UChar buf[2];
    int32_t idx = 0;
    UBool isError = FALSE;

    U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
    if (isError) {
        return U_EOF;
    }
    return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
}
Exemple #8
0
/*
Function:
ChangeCaseTurkish

Performs upper or lower casing of a string into a new buffer, performing special
casing for Turkish.
*/
extern "C" void ChangeCaseTurkish(const UChar* lpSrc,
								  int32_t cwSrcLength,
								  UChar* lpDst,
								  int32_t cwDstLength,
								  int32_t bToUpper)
{
	// See algorithmic comment in ChangeCase.

	UBool isError = FALSE;
	int32_t srcIdx = 0, dstIdx = 0;
	UChar32 srcCodepoint, dstCodepoint;

	if (bToUpper)
	{
		while (srcIdx < cwSrcLength)
		{
			// In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN
			// CAPITAL LETTER I WITH DOT ABOVE (U+0130).
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint));
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
	else
	{
		while (srcIdx < cwSrcLength)
		{
			// In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to
			// LATIN SMALL LETTER DOTLESS I (U+0131).
			U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
			dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint));
			U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
			assert(isError == FALSE && srcIdx == dstIdx);
		}
	}
}
 void mirrorCharacters(UChar* destination, const UChar* source, int length) const
 {
     int position = 0;
     bool error = false;
     // Iterate characters in source and mirror character if needed.
     while (position < length) {
         UChar32 character;
         int nextPosition = position;
         U16_NEXT(source, nextPosition, length, character);
         character = u_charMirror(character);
         U16_APPEND(destination, position, length, character, error);
         ASSERT(!error);
         position = nextPosition;
     }
 }
Exemple #10
0
static void normalizeCharacters(const UChar* source, UChar* destination, int length)
{
    int position = 0;
    bool error = false;
    while (position < length) {
        UChar32 character;
        int nextPosition = position;
        U16_NEXT(source, nextPosition, length, character);
        // Don't normalize tabs as they are not treated as spaces for word-end.
        if (Font::treatAsSpace(character) && character != '\t')
            character = ' ';
        else if (Font::treatAsZeroWidthSpaceInComplexScript(character))
            character = zeroWidthSpace;
        U16_APPEND(destination, position, length, character, error);
        ASSERT_UNUSED(error, !error);
        position = nextPosition;
    }
}
void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length)
{
    int position = 0;
    bool error = false;
    // Iterate characters in source and mirror character if needed.
    while (position < length) {
        UChar32 character;
        int nextPosition = position;
        U16_NEXT(source, nextPosition, length, character);
        if (Font::treatAsSpace(character))
            character = ' ';
        else if (Font::treatAsZeroWidthSpace(character))
            character = zeroWidthSpace;
        else if (rtl)
            character = u_charMirror(character);
        U16_APPEND(destination, position, length, character, error);
        ASSERT(!error);
        position = nextPosition;
    }
}
Exemple #12
0
static void normalizeSpacesAndMirrorChars(const UChar* source, UChar* destination, int length, HarfBuzzShaperBase::NormalizeMode normalizeMode)
{
    int position = 0;
    bool error = false;
    // Iterate characters in source and mirror character if needed.
    while (position < length) {
        UChar32 character;
        int nextPosition = position;
        U16_NEXT(source, nextPosition, length, character);
        if (Font::treatAsSpace(character))
            character = ' ';
        else if (Font::treatAsZeroWidthSpace(character))
            character = zeroWidthSpace;
        else if (normalizeMode == HarfBuzzShaperBase::NormalizeMirrorChars)
            character = u_charMirror(character);
        U16_APPEND(destination, position, length, character, error);
        ASSERT_UNUSED(error, !error);
        position = nextPosition;
    }
}
Exemple #13
0
static void
demo_utf_h_macros() {
    static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 };
    UChar32 c;
    int32_t i;
    UBool isError;

    printf("\n* demo_utf_h_macros() -------------- ***\n\n");

    printUString("iterate forward through: ", input, UPRV_LENGTHOF(input));
    for(i=0; i<UPRV_LENGTHOF(input); /* U16_NEXT post-increments */) {
        /* Iterating forwards 
           Codepoint at offset 0: U+0061
           Codepoint at offset 1: U+10000
           Codepoint at offset 3: U+10ffff
           Codepoint at offset 5: U+0062
        */
        printf("Codepoint at offset %d: U+", i);
        U16_NEXT(input, i, UPRV_LENGTHOF(input), c);
        printf("%04x\n", c); 
    }

    puts("");

    isError=FALSE;
    i=1; /* write position, gets post-incremented so needs to be in an l-value */
    U16_APPEND(input, i, UPRV_LENGTHOF(input), 0x0062, isError);

    printUString("iterate backward through: ", input, UPRV_LENGTHOF(input));
    for(i=UPRV_LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) {
        U16_PREV(input, 0, i, c);
        /* Iterating backwards
           Codepoint at offset 5: U+0062
           Codepoint at offset 3: U+10ffff
           Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten
           Codepoint at offset 1: U+0062 -- by this BMP code point
           Codepoint at offset 0: U+0061
        */
        printf("Codepoint at offset %d: U+%04x\n", i, c);
    }
}
Exemple #14
0
/*
** Prepare to begin tokenizing a particular string.  The input
** string to be tokenized is pInput[0..nBytes-1].  A cursor
** used to incrementally tokenize this string is returned in 
** *ppCursor.
*/
static int icuOpen(
  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
  const char *zInput,                    /* Input string */
  int nInput,                            /* Length of zInput in bytes */
  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
){
  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
  IcuCursor *pCsr;

  const int32_t opt = U_FOLD_CASE_DEFAULT;
  UErrorCode status = U_ZERO_ERROR;
  int nChar;

  UChar32 c;
  int iInput = 0;
  int iOut = 0;

  *ppCursor = 0;

  if( nInput<0 ){
    nInput = strlen(zInput);
  }
  nChar = nInput+1;
  pCsr = (IcuCursor *)sqlite3_malloc(
      sizeof(IcuCursor) +                /* IcuCursor */
      nChar * sizeof(UChar) +            /* IcuCursor.aChar[] */
      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
  );
  if( !pCsr ){
    return SQLITE_NOMEM;
  }
  memset(pCsr, 0, sizeof(IcuCursor));
  pCsr->aChar = (UChar *)&pCsr[1];
  pCsr->aOffset = (int *)&pCsr->aChar[nChar];

  pCsr->aOffset[iOut] = iInput;
  U8_NEXT(zInput, iInput, nInput, c); 
  while( c>0 ){
    int isError = 0;
    c = u_foldCase(c, opt);
    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
    if( isError ){
      sqlite3_free(pCsr);
      return SQLITE_ERROR;
    }
    pCsr->aOffset[iOut] = iInput;

    if( iInput<nInput ){
      U8_NEXT(zInput, iInput, nInput, c);
    }else{
      c = 0;
    }
  }

  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
  if( !U_SUCCESS(status) ){
    sqlite3_free(pCsr);
    return SQLITE_ERROR;
  }
  pCsr->nChar = iOut;

  ubrk_first(pCsr->pIter);
  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
  return SQLITE_OK;
}
Exemple #15
0
static void demoCaseMapInC() {
    /*
     * input=
     *   "aB<capital sigma>"
     *   "iI<small dotless i><capital dotted I> "
     *   "<sharp s> <small lig. ffi>"
     *   "<small final sigma><small sigma><capital sigma>"
     */
    static const UChar input[]={
        0x61, 0x42, 0x3a3,
        0x69, 0x49, 0x131, 0x130, 0x20,
        0xdf, 0x20, 0xfb03,
        0x3c2, 0x3c3, 0x3a3, 0
    };
    UChar buffer[32];

    UErrorCode errorCode;
    UChar32 c;
    int32_t i, j, length;
    UBool isError;

    printf("\n* demoCaseMapInC() ----------------- ***\n\n");

    /*
     * First, use simple case mapping functions which provide
     * 1:1 code point mappings without context/locale ID.
     *
     * Note that some mappings will not be "right" because some "real"
     * case mappings require context, depend on the locale ID,
     * and/or result in a change in the number of code points.
     */
    printUString("input string: ", input, -1);

    /* uppercase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_toupper(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-uppercased: ", buffer, j);
    /* lowercase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_tolower(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-lowercased: ", buffer, j);
    /* titlecase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_totitle(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-titlecased: ", buffer, j);
    /* case-fold/default */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_foldCase(c, U_FOLD_CASE_DEFAULT);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-case-folded/default: ", buffer, j);
    /* case-fold/Turkic */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-case-folded/Turkic: ", buffer, j);

    /*
     * Second, use full case mapping functions which provide
     * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID.
     *
     * Note that lower/upper/titlecasing take a locale ID while case-folding
     * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file.
     *
     * Also, string titlecasing requires a BreakIterator to find starts of words.
     * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default
     * titlecasing BreakIterator automatically.
     * For production code where many strings are titlecased it would be more efficient
     * to open a BreakIterator externally and pass it in.
     */
    printUString("\ninput string: ", input, -1);

    /* lowercase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-lowercased/en: ", buffer, length);
    } else {
        printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* lowercase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-lowercased/tr: ", buffer, length);
    } else {
        printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* uppercase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-uppercased/en: ", buffer, length);
    } else {
        printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* uppercase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-uppercased/tr: ", buffer, length);
    } else {
        printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* titlecase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-titlecased/en: ", buffer, length);
    } else {
        printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* titlecase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-titlecased/tr: ", buffer, length);
    } else {
        printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* case-fold/default */
    errorCode=U_ZERO_ERROR;
    length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-case-folded/default: ", buffer, length);
    } else {
        printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* case-fold/Turkic */
    errorCode=U_ZERO_ERROR;
    length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-case-folded/Turkic: ", buffer, length);
    } else {
        printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode));
    }
}
Exemple #16
0
static int32_t
u_scanf_scanset_handler(UFILE       *input,
                        u_scanf_spec_info *info,
                        ufmt_args   *args,
                        const UChar *fmt,
                        int32_t     *fmtConsumed,
                        int32_t     *argConverted)
{
    USet        *scanset;
    UErrorCode  status = U_ZERO_ERROR;
    int32_t     chLeft = INT32_MAX;
    UChar32     c;
    UChar       *alias = (UChar*) (args[0].ptrValue);
    UBool       isNotEOF = FALSE;
    UBool       readCharacter = FALSE;

    /* Create an empty set */
    scanset = uset_open(0, -1);

    /* Back up one to get the [ */
    fmt--;

    /* truncate to the width, if specified and alias the target */
    if(info->fWidth >= 0) {
        chLeft = info->fWidth;
    }

    /* parse the scanset from the fmt string */
    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);

    /* verify that the parse was successful */
    if (U_SUCCESS(status)) {
        c=0;

        /* grab characters one at a time and make sure they are in the scanset */
        while(chLeft > 0) {
            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
                readCharacter = TRUE;
                if (!info->fSkipArg) {
                    int32_t idx = 0;
                    UBool isError = FALSE;

                    U16_APPEND(alias, idx, chLeft, c, isError);
                    if (isError) {
                        break;
                    }
                    alias += idx;
                }
                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
            }
            else {
                /* if the character's not in the scanset, break out */
                break;
            }
        }

        /* put the final character we read back on the input */
        if(isNotEOF && chLeft > 0) {
            u_fungetc(c, input);
        }
    }

    uset_close(scanset);

    /* if we didn't match at least 1 character, fail */
    if(!readCharacter)
        return -1;
    /* otherwise, add the terminator */
    else if (!info->fSkipArg) {
        *alias = 0x00;
    }

    /* we converted 1 arg */
    *argConverted = !info->fSkipArg;
    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
}
Exemple #17
0
/** @bug 4108762
 * Test for getMaxExpansion()
 */
static void TestMaxExpansion()
{
    UErrorCode          status = U_ZERO_ERROR;
    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    UChar               ch     = 0;
    UChar32             unassigned = 0xEFFFD;
    UChar               supplementary[2];
    uint32_t            stringOffset = 0;
    UBool               isError = FALSE;
    uint32_t            sorder = 0;
    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    uint32_t            temporder = 0;

    UChar rule[256];
    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
        UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_SUCCESS(status) && coll) {
      iter = ucol_openElements(coll, &ch, 1, &status);

      while (ch < 0xFFFF && U_SUCCESS(status)) {
          int      count = 1;
          uint32_t order;
          int32_t  size = 0;

          ch ++;

          ucol_setText(iter, &ch, 1, &status);
          order = ucol_previous(iter, &status);

          /* thai management */
          if (order == 0)
              order = ucol_previous(iter, &status);

          while (U_SUCCESS(status) &&
              ucol_previous(iter, &status) != UCOL_NULLORDER) {
              count ++;
          }

          size = ucol_getMaxExpansion(iter, order);
          if (U_FAILURE(status) || size < count) {
              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, count);
          }
      }

      /* testing for exact max expansion */
      ch = 0;
      while (ch < 0x61) {
          uint32_t order;
          int32_t  size;
          ucol_setText(iter, &ch, 1, &status);
          order = ucol_previous(iter, &status);
          size  = ucol_getMaxExpansion(iter, order);
          if (U_FAILURE(status) || size != 1) {
              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, 1);
          }
          ch ++;
      }

      ch = 0x63;
      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
                  ch, 3);
      }

      ch = 0x64;
      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
                  ch, 3);
      }

      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
      (void)isError;    /* Suppress set but not used warning. */
      ucol_setText(iter, supplementary, 2, &status);
      sorder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, 2);
      }

      /* testing jamo */
      ch = 0x1165;

      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);
      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
                  ch, 3);
      }

      ucol_closeElements(iter);
      ucol_close(coll);

      /* testing special jamo &a<\u1160 */
      rule[0] = 0x26;
      rule[1] = 0x71;
      rule[2] = 0x3c;
      rule[3] = 0x1165;
      rule[4] = 0x2f;
      rule[5] = 0x71;
      rule[6] = 0x71;
      rule[7] = 0x71;
      rule[8] = 0x71;
      rule[9] = 0;

      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
          UCOL_DEFAULT_STRENGTH,NULL, &status);
      iter = ucol_openElements(coll, &ch, 1, &status);

      temporder = ucol_previous(iter, &status);
      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
                  ch, 5);
      }

      ucol_closeElements(iter);
      ucol_close(coll);
    } else {
      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    }

}