Example #1
0
        int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut)
        {
            int32_t resultStringLength = 0;
            UErrorCode errorCode = U_ZERO_ERROR;

            static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated");
            if (caseFlags == CaseFlagsUpper)
            {
                resultStringLength = u_strToUpper((UChar*) destString, destLength,
                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
            }
            else if (caseFlags == CaseFlagsLower)
            {
                resultStringLength = u_strToLower((UChar*) destString, destLength,
                    (UChar*) sourceString, sourceLength, NULL, &errorCode);
            }
            else
            {
                Assert(false);
            }

            if (U_FAILURE(errorCode) &&
                !(destLength == 0 && errorCode == U_BUFFER_OVERFLOW_ERROR))
            {
                *pErrorOut = TranslateUErrorCode(errorCode);
                return -1;
            }

            // Todo: check for resultStringLength > destLength
            // Return insufficient buffer in that case
            return resultStringLength;
        }
Example #2
0
/*
** Implementations of scalar functions for case mapping - upper() and 
** lower(). Function upper() converts its input to upper-case (ABC).
** Function lower() converts to lower-case (abc).
**
** ICU provides two types of case mapping, "general" case mapping and
** "language specific". Refer to ICU documentation for the differences
** between the two.
**
** To utilise "general" case mapping, the upper() or lower() scalar 
** functions are invoked with one argument:
**
**     upper('ABC') -> 'abc'
**     lower('abc') -> 'ABC'
**
** To access ICU "language specific" case mapping, upper() or lower()
** should be invoked with two arguments. The second argument is the name
** of the locale to use. Passing an empty string ("") or SQL NULL value
** as the second argument is the same as invoking the 1 argument version
** of upper() or lower().
**
**     lower('I', 'en_us') -> 'i'
**     lower('I', 'tr_tr') -> '\u131' (small dotless i)
**
** http://www.icu-project.org/userguide/posix.html#case_mappings
*/
static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
  const UChar *zInput;            /* Pointer to input string */
  UChar *zOutput = 0;             /* Pointer to output buffer */
  int nInput;                     /* Size of utf-16 input string in bytes */
  int nOut;                       /* Size of output buffer in bytes */
  int cnt;
  int bToUpper;                   /* True for toupper(), false for tolower() */
  UErrorCode status;
  const char *zLocale = 0;

  assert(nArg==1 || nArg==2);
  bToUpper = (sqlite3_user_data(p)!=0);
  if( nArg==2 ){
    zLocale = (const char *)sqlite3_value_text(apArg[1]);
  }

  zInput = sqlite3_value_text16(apArg[0]);
  if( !zInput ){
    return;
  }
  nOut = nInput = sqlite3_value_bytes16(apArg[0]);
  if( nOut==0 ){
    sqlite3_result_text16(p, "", 0, SQLITE_STATIC);
    return;
  }

  for(cnt=0; cnt<2; cnt++){
    UChar *zNew = sqlite3_realloc(zOutput, nOut);
    if( zNew==0 ){
      sqlite3_free(zOutput);
      sqlite3_result_error_nomem(p);
      return;
    }
    zOutput = zNew;
    status = U_ZERO_ERROR;
    if( bToUpper ){
      nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
    }else{
      nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status);
    }

    if( U_SUCCESS(status) ){
      sqlite3_result_text16(p, zOutput, nOut, xFree);
    }else if( status==U_BUFFER_OVERFLOW_ERROR ){
      assert( cnt==0 );
      continue;
    }else{
      icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status);
    }
    return;
  }
  assert( 0 );     /* Unreachable */
}
Example #3
0
/// Convert the unicode string to lowercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
/// the required length to get a lowercase version of the string.
int32_t
swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
                                        int32_t DestinationCapacity,
                                        const uint16_t *Source,
                                        int32_t SourceLength) {
  UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
  uint32_t OutputLength = u_strToLower(reinterpret_cast<UChar *>(Destination),
                                       DestinationCapacity,
                                       reinterpret_cast<const UChar *>(Source),
                                       SourceLength,
                                       "", &ErrorCode);
#else
  uint32_t OutputLength = u_strToLower(Destination, DestinationCapacity,
                                       Source, SourceLength,
                                       "", &ErrorCode);
#endif
  if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
    swift::crash("u_strToLower: Unexpected error lowercasing unicode string.");
  }
  return OutputLength;
}
int helper_normalize_str(const char *src, char *dest, int dest_size)
{
	int type = CTS_LANG_OTHERS;
	int32_t size;
	UErrorCode status = 0;
	UChar tmp_result[CTS_SQL_MAX_LEN*2];
	UChar result[CTS_SQL_MAX_LEN*2];
	int i = 0;
	int j = 0;
	int str_len = strlen(src);
	int char_len = 0;

	for (i=0;i<str_len;i+=char_len) {
		char char_src[10];
		char_len = check_utf8(src[i]);
		memcpy(char_src, &src[i], char_len);
		char_src[char_len] = '\0';

		u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strFromUTF8() Failed(%s)", u_errorName(status));

		u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToLower() Failed(%s)", u_errorName(status));

		size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
				(UChar *)result, array_sizeof(result), &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status));

		if (0 == i)
			type = helper_check_language(result);
		helper_extra_normalize(result, size);

		u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToUTF8() Failed(%s)", u_errorName(status));
		j += size;
		dest[j++] = 0x01;
	}
	dest[j]='\0';
	HELPER_DBG("src(%s) is transformed(%s)", src, dest);
	return type;
}
Example #5
0
File: icu.c Project: Kielek/calibre
// lower {{{
static PyObject *
icu_lower(PyObject *self, PyObject *args) {
    char *input, *ans, *buf3 = NULL;
    const char *loc;
    int32_t sz;
    UChar *buf, *buf2;
    PyObject *ret;
    UErrorCode status = U_ZERO_ERROR;
  

    if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL;
    
    sz = (int32_t)strlen(input);

    buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
    buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar));


    if (buf == NULL || buf2 == NULL) return PyErr_NoMemory();

    u_strFromUTF8(buf, sz*4, NULL, input, sz, &status);
    u_strToLower(buf2, sz*8, buf, -1, loc, &status);

    ans = input;
    sz = u_strlen(buf2);
    free(buf);

    if (U_SUCCESS(status) && sz > 0) {
        buf3 = (char*)calloc(sz*5+1, sizeof(char));
        if (buf3 == NULL) return PyErr_NoMemory();
        u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status);
        if (U_SUCCESS(status)) ans = buf3;
    }

    ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace");
    if (ret == NULL) return PyErr_NoMemory();

    free(buf2);
    if (buf3 != NULL) free(buf3);
    PyMem_Free(input);

    return ret;
} // }}}
Example #6
0
/*
** Implementations of scalar functions for case mapping - upper() and 
** lower(). Function upper() converts its input to upper-case (ABC).
** Function lower() converts to lower-case (abc).
**
** ICU provides two types of case mapping, "general" case mapping and
** "language specific". Refer to ICU documentation for the differences
** between the two.
**
** To utilise "general" case mapping, the upper() or lower() scalar 
** functions are invoked with one argument:
**
**     upper('ABC') -> 'abc'
**     lower('abc') -> 'ABC'
**
** To access ICU "language specific" case mapping, upper() or lower()
** should be invoked with two arguments. The second argument is the name
** of the locale to use. Passing an empty string ("") or SQL NULL value
** as the second argument is the same as invoking the 1 argument version
** of upper() or lower().
**
**     lower('I', 'en_us') -> 'i'
**     lower('I', 'tr_tr') -> 'ı' (small dotless i)
**
** http://www.icu-project.org/userguide/posix.html#case_mappings
*/
static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
  const UChar *zInput;
  UChar *zOutput;
  int nInput;
  int nOutput;

  UErrorCode status = U_ZERO_ERROR;
  const char *zLocale = 0;

  assert(nArg==1 || nArg==2);
  if( nArg==2 ){
    zLocale = (const char *)sqlite3_value_text(apArg[1]);
  }

  zInput = sqlite3_value_text16(apArg[0]);
  if( !zInput ){
    return;
  }
  nInput = sqlite3_value_bytes16(apArg[0]);

  nOutput = nInput * 2 + 2;
  zOutput = sqlite3_malloc(nOutput);
  if( !zOutput ){
    return;
  }

  if( sqlite3_user_data(p) ){
    u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
  }else{
    u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
  }

  if( !U_SUCCESS(status) ){
    icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
    return;
  }

  sqlite3_result_text16(p, zOutput, -1, xFree);
}
Example #7
0
File: icu.c Project: IvoNet/calibre
static PyObject* icu_change_case(PyObject *self, PyObject *args) {
    char *locale = NULL;
    PyObject *input = NULL, *result = NULL;
    int which = UPPER_CASE;
    UErrorCode status = U_ZERO_ERROR;
    UChar *input_buf = NULL, *output_buf = NULL;
    int32_t sz = 0;

    if (!PyArg_ParseTuple(args, "Oiz", &input, &which, &locale)) return NULL;
    if (locale == NULL) {
        PyErr_SetString(PyExc_NotImplementedError, "You must specify a locale");  // We deliberately use NotImplementedError so that this error can be unambiguously identified
        return NULL;
    }

    input_buf = python_to_icu(input, &sz, 1);
    if (input_buf == NULL) goto end;
    output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
    if (output_buf == NULL) { PyErr_NoMemory(); goto end; }

    switch (which) {
        case TITLE_CASE:
            sz = u_strToTitle(output_buf, 3 * sz, input_buf, sz, NULL, locale, &status);
            break;
        case UPPER_CASE:
            sz = u_strToUpper(output_buf, 3 * sz, input_buf, sz, locale, &status);
            break;
        default:
            sz = u_strToLower(output_buf, 3 * sz, input_buf, sz, locale, &status);
    }
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; }
    result = icu_to_python(output_buf, sz);

end:
    if (input_buf != NULL) free(input_buf);
    if (output_buf != NULL) free(output_buf);
    return result;

} // }}}
Example #8
0
static uint32_t icu_utf16_sub(struct icu_buf_utf16 *dest16,
                          struct icu_buf_utf16 *src16,
                          const char *locale, char action,
                          UErrorCode *status)
{
    switch (action)
    {
    case 'l':
    case 'L':
        return u_strToLower(dest16->utf16, dest16->utf16_cap,
                            src16->utf16, src16->utf16_len,
                            locale, status);
    case 'u':
    case 'U':
        return u_strToUpper(dest16->utf16, dest16->utf16_cap,
                            src16->utf16, src16->utf16_len,
                            locale, status);
        break;
    case 't':
    case 'T':
        return u_strToTitle(dest16->utf16, dest16->utf16_cap,
                            src16->utf16, src16->utf16_len,
                            0, locale, status);
        break;
    case 'f':
    case 'F':
        return u_strFoldCase(dest16->utf16, dest16->utf16_cap,
                             src16->utf16, src16->utf16_len,
                             U_FOLD_CASE_DEFAULT, status);
        break;
    default:
        *status = U_UNSUPPORTED_ERROR;
        break;
    }
    return 0;
}
Example #9
0
static void
TestCaseLower(void) {
    static const UChar

    beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
    lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
    lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff };

    UChar buffer[32];
    int32_t length;
    UErrorCode errorCode;

    /* lowercase with root locale and separate buffers */
    buffer[0]=0xabcd;
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
                        beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
                        "",
                        &errorCode);
    if( U_FAILURE(errorCode) ||
        length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
        uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
        buffer[length]!=0
    ) {
        log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n",
            length,
            u_errorName(errorCode),
            uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 &&
buffer[length]==0 ? "yes" : "no",
            aescstrdup(lowerRoot,-1),
            aescstrdup(buffer,-1));
    }

    /* lowercase with turkish locale and in the same buffer */
    uprv_memcpy(buffer, beforeLower, sizeof(beforeLower));
    buffer[sizeof(beforeLower)/U_SIZEOF_UCHAR]=0;
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
                        buffer, -1, /* implicit srcLength */
                        "tr",
                        &errorCode);
    if( U_FAILURE(errorCode) ||
        length!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) ||
        uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
        buffer[length]!=0
    ) {
        log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n",
            length,
            u_errorName(errorCode),
            uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
    }

    /* test preflighting */
    buffer[0]=buffer[2]=0xabcd;
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, 2, /* set destCapacity=2 */
                        beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
                        "",
                        &errorCode);
    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
        length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
        uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
        buffer[2]!=0xabcd
    ) {
        log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n",
            length,
            u_errorName(errorCode),
            uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
    }

    /* test error handling */
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(NULL, sizeof(buffer)/U_SIZEOF_UCHAR,
                        beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
                        "",
                        &errorCode);
    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
        log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n",
            length,
            u_errorName(errorCode));
    }

    buffer[0]=0xabcd;
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, -1,
                        beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR,
                        "",
                        &errorCode);
    if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
        buffer[0]!=0xabcd
    ) {
        log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
            length,
            u_errorName(errorCode),
            buffer[0]);
    }
}
Example #10
0
int32_t
u_sprintf_scientific_handler(u_localized_string *output,
                             const u_sprintf_spec_info     *info,
                             const ufmt_args            *args)
{
    double        num         = (double) (args[0].doubleValue);
    UNumberFormat        *format;
    UChar            result        [USPRINTF_BUFFER_SIZE];
    int32_t        minDecimalDigits;
    int32_t        maxDecimalDigits;
    UErrorCode        status        = U_ZERO_ERROR;
    UChar srcExpBuf[USPRINTF_SYMBOL_BUFFER_SIZE];
    int32_t srcLen, expLen;
    UChar expBuf[USPRINTF_SYMBOL_BUFFER_SIZE];


    /* mask off any necessary bits */
    /*  if(! info->fIsLongDouble)
    num &= DBL_MAX;*/

    /* get the formatter */
    format = u_locbund_getScientificFormat(output->fBundle);

    /* handle error */
    if(format == 0)
        return 0;

    /* set the appropriate flags on the formatter */

    /* clone the stream's bundle if it isn't owned */
    if(! output->fOwnBundle) {
        output->fBundle     = u_locbund_clone(output->fBundle);
        output->fOwnBundle  = TRUE;
        format              = u_locbund_getScientificFormat(output->fBundle);
    }

    srcLen = unum_getSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        srcExpBuf,
        sizeof(srcExpBuf),
        &status);

    /* Upper/lower case the e */
    if (info->fSpec == (UChar)0x65 /* e */) {
        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            output->fBundle->fLocale,
            &status);
    }
    else {
        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            output->fBundle->fLocale,
            &status);
    }

    unum_setSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        expBuf,
        expLen,
        &status);

    /* set the number of decimal digits */

    /* save the formatter's state */
    minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS);
    maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS);

    if(info->fPrecision != -1) {
        /* set the # of decimal digits */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision);
    }
    else if(info->fPrecision == 0 && ! info->fAlt) {
        /* no decimal point in this case */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, 0);
    }
    else if(info->fAlt) {
        /* '#' means always show decimal point */
        /* copy of printf behavior on Solaris - '#' shows 6 digits */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6);
    }
    else {
        /* # of decimal digits is 6 if precision not specified */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6);
    }

    /* set whether to show the sign */
    u_sprintf_set_sign(format, info, &status);

    /* format the number */
    unum_formatDouble(format, num, result, USPRINTF_BUFFER_SIZE, 0, &status);

    /* restore the number format */
    unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits);
    unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits);

    /* Since we clone the fBundle and we're only using the scientific
       format, we don't need to save the old exponent value. */
    /*unum_setSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        srcExpBuf,
        srcLen,
        &status);*/

    return u_sprintf_pad_and_justify(output, info, result, u_strlen(result));
}
Example #11
0
static int32_t
u_scanf_scientific_handler(UFILE        *input,
                           u_scanf_spec_info *info,
                           ufmt_args    *args,
                           const UChar  *fmt,
                           int32_t      *fmtConsumed,
                           int32_t      *argConverted)
{
    int32_t         len;
    double          num;
    UNumberFormat   *format;
    int32_t         parsePos    = 0;
    int32_t         skipped;
    UErrorCode      status      = U_ZERO_ERROR;
    UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
    int32_t srcLen, expLen;
    UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];


    /* skip all ws in the input */
    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

    /* fill the input's internal buffer */
    ufile_fill_uchar_buffer(input);

    /* determine the size of the input's buffer */
    len = (int32_t)(input->str.fLimit - input->str.fPos);

    /* truncate to the width, if specified */
    if(info->fWidth != -1)
        len = ufmt_min(len, info->fWidth);

    /* get the formatter */
    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);

    /* handle error */
    if(format == 0)
        return 0;

    /* set the appropriate flags on the formatter */

    srcLen = unum_getSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        srcExpBuf,
        sizeof(srcExpBuf),
        &status);

    /* Upper/lower case the e */
    if (info->fSpec == (UChar)0x65 /* e */) {
        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            input->str.fBundle.fLocale,
            &status);
    }
    else {
        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            input->str.fBundle.fLocale,
            &status);
    }

    unum_setSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        expBuf,
        expLen,
        &status);




    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);

    /* parse the number */
    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);

    if (!info->fSkipArg) {
        if (info->fIsLong)
            *(double*)(args[0].ptrValue) = num;
        else if (info->fIsLongDouble)
            *(long double*)(args[0].ptrValue) = num;
        else
            *(float*)(args[0].ptrValue) = (float)num;
    }

    /* mask off any necessary bits */
    /*  if(! info->fIsLong_double)
    num &= DBL_MAX;*/

    /* update the input's position to reflect consumed data */
    input->str.fPos += parsePos;

    /* we converted 1 arg */
    *argConverted = !info->fSkipArg;
    return parsePos + skipped;
}
static int32_t
u_printf_scientific_handler(const u_printf_stream_handler  *handler,
                            void                           *context,
                            ULocaleBundle                  *formatBundle,
                            const u_printf_spec_info       *info,
                            const ufmt_args                *args)
{
    double          num         = (double) (args[0].doubleValue);
    UNumberFormat   *format;
    UChar           result[UPRINTF_BUFFER_SIZE];
    UChar           prefixBuffer[UPRINTF_BUFFER_SIZE];
    int32_t         prefixBufferLen = sizeof(prefixBuffer);
    int32_t         minDecimalDigits;
    int32_t         maxDecimalDigits;
    UErrorCode      status        = U_ZERO_ERROR;
    UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
    int32_t srcLen, expLen;
    int32_t resultLen;
    UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];

    prefixBuffer[0] = 0;

    /* mask off any necessary bits */
    /*  if(! info->fIsLongDouble)
    num &= DBL_MAX;*/

    /* get the formatter */
    format = u_locbund_getNumberFormat(formatBundle, UNUM_SCIENTIFIC);

    /* handle error */
    if(format == 0)
        return 0;

    /* set the appropriate flags on the formatter */

    srcLen = unum_getSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        srcExpBuf,
        sizeof(srcExpBuf),
        &status);

    /* Upper/lower case the e */
    if (info->fSpec == (UChar)0x65 /* e */) {
        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            formatBundle->fLocale,
            &status);
    }
    else {
        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
            srcExpBuf, srcLen,
            formatBundle->fLocale,
            &status);
    }

    unum_setSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        expBuf,
        expLen,
        &status);

    /* save the formatter's state */
    minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS);
    maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS);

    /* set the appropriate flags and number of decimal digits on the formatter */
    if(info->fPrecision != -1) {
        /* set the # of decimal digits */
        if (info->fOrigSpec == (UChar)0x65 /* e */ || info->fOrigSpec == (UChar)0x45 /* E */) {
            unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision);
        }
        else {
            unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, 1);
            unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, info->fPrecision);
        }
    }
    else if(info->fAlt) {
        /* '#' means always show decimal point */
        /* copy of printf behavior on Solaris - '#' shows 6 digits */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6);
    }
    else {
        /* # of decimal digits is 6 if precision not specified */
        unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6);
    }

    /* set whether to show the sign */
    if (info->fShowSign) {
        u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status);
    }

    /* format the number */
    resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status);

    if (U_FAILURE(status)) {
        resultLen = 0;
    }

    /* restore the number format */
    /* TODO: Is this needed? */
    unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits);
    unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits);

    /* Since we're the only one using the scientific
       format, we don't need to save the old exponent value. */
    /*unum_setSymbol(format,
        UNUM_EXPONENTIAL_SYMBOL,
        srcExpBuf,
        srcLen,
        &status);*/

    if (info->fShowSign) {
        /* Reset back to original value regardless of what the error was */
        UErrorCode localStatus = U_ZERO_ERROR;
        u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus);
    }

    return handler->pad_and_justify(context, info, result, resultLen);
}
Example #13
0
static void demoCaseMapInC() {
    /*
     * input=
     *   "aB<capital sigma>"
     *   "iI<small dotless i><capital dotted I> "
     *   "<sharp s> <small lig. ffi>"
     *   "<small final sigma><small sigma><capital sigma>"
     */
    static const UChar input[]={
        0x61, 0x42, 0x3a3,
        0x69, 0x49, 0x131, 0x130, 0x20,
        0xdf, 0x20, 0xfb03,
        0x3c2, 0x3c3, 0x3a3, 0
    };
    UChar buffer[32];

    UErrorCode errorCode;
    UChar32 c;
    int32_t i, j, length;
    UBool isError;

    printf("\n* demoCaseMapInC() ----------------- ***\n\n");

    /*
     * First, use simple case mapping functions which provide
     * 1:1 code point mappings without context/locale ID.
     *
     * Note that some mappings will not be "right" because some "real"
     * case mappings require context, depend on the locale ID,
     * and/or result in a change in the number of code points.
     */
    printUString("input string: ", input, -1);

    /* uppercase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_toupper(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-uppercased: ", buffer, j);
    /* lowercase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_tolower(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-lowercased: ", buffer, j);
    /* titlecase */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_totitle(c);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-titlecased: ", buffer, j);
    /* case-fold/default */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_foldCase(c, U_FOLD_CASE_DEFAULT);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-case-folded/default: ", buffer, j);
    /* case-fold/Turkic */
    isError=FALSE;
    for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
        if(c==0) {
            break; /* stop at terminating NUL, no need to terminate buffer */
        }
        c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
        U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    }
    printUString("simple-case-folded/Turkic: ", buffer, j);

    /*
     * Second, use full case mapping functions which provide
     * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID.
     *
     * Note that lower/upper/titlecasing take a locale ID while case-folding
     * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file.
     *
     * Also, string titlecasing requires a BreakIterator to find starts of words.
     * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default
     * titlecasing BreakIterator automatically.
     * For production code where many strings are titlecased it would be more efficient
     * to open a BreakIterator externally and pass it in.
     */
    printUString("\ninput string: ", input, -1);

    /* lowercase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-lowercased/en: ", buffer, length);
    } else {
        printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* lowercase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-lowercased/tr: ", buffer, length);
    } else {
        printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* uppercase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-uppercased/en: ", buffer, length);
    } else {
        printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* uppercase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-uppercased/tr: ", buffer, length);
    } else {
        printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* titlecase/English */
    errorCode=U_ZERO_ERROR;
    length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-titlecased/en: ", buffer, length);
    } else {
        printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* titlecase/Turkish */
    errorCode=U_ZERO_ERROR;
    length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-titlecased/tr: ", buffer, length);
    } else {
        printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* case-fold/default */
    errorCode=U_ZERO_ERROR;
    length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-case-folded/default: ", buffer, length);
    } else {
        printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode));
    }
    /* case-fold/Turkic */
    errorCode=U_ZERO_ERROR;
    length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
    if(U_SUCCESS(errorCode)) {
        printUString("full-case-folded/Turkic: ", buffer, length);
    } else {
        printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode));
    }
}
Example #14
0
File: psl.c Project: jcajka/libpsl
/**
 * psl_str_to_utf8lower:
 * @str: string to convert
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
 * @lower: return value containing the converted string
 *
 * This helper function converts a string to lowercase UTF-8 representation.
 * Lowercase UTF-8 is needed as input to the domain checking functions.
 *
 * @lower is set to %NULL on error.
 *
 * The return value 'lower' must be freed after usage.
 *
 * Returns: psl_error_t value.
 *   PSL_SUCCESS: Success
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
 *
 * Since: 0.4
 */
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
{
	int ret = PSL_ERR_INVALID_ARG;

	if (lower)
		*lower = NULL;

	if (!str)
		return PSL_ERR_INVALID_ARG;

	/* shortcut to avoid costly conversion */
	if (_str_is_ascii(str)) {
		if (lower) {
			char *p;

			*lower = strdup(str);

			/* convert ASCII string to lowercase */
			for (p = *lower; *p; p++)
				if (isupper(*p))
					*p = tolower(*p);
		}
		return PSL_SUCCESS;
	}

#ifdef WITH_LIBICU
	do {
	size_t str_length = strlen(str);
	UErrorCode status = 0;
	UChar *utf16_dst, *utf16_lower;
	int32_t utf16_dst_length;
	char *utf8_lower;
	UConverter *uconv;

	/* C89 allocation */
	utf16_dst   = alloca(sizeof(UChar) * (str_length * 2 + 1));
	utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
	utf8_lower  = alloca(str_length * 2 + 1);

	uconv = ucnv_open(encoding, &status);
	if (U_SUCCESS(status)) {
		utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
		ucnv_close(uconv);

		if (U_SUCCESS(status)) {
			int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
			if (U_SUCCESS(status)) {
				u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status);
				if (U_SUCCESS(status)) {
					if (lower)
						*lower = strdup(utf8_lower);
					ret = PSL_SUCCESS;
				} else {
					ret = PSL_ERR_TO_UTF8;
					/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
				}
			} else {
				ret = PSL_ERR_TO_LOWER;
				/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
			}
		} else {
			ret = PSL_ERR_TO_UTF16;
			/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
		}
	} else {
		ret = PSL_ERR_CONVERTER;
		/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
	}
	} while (0);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
	do {
		/* find out local charset encoding */
		if (!encoding) {
			encoding = nl_langinfo(CODESET);

			if (!encoding || !*encoding)
				encoding = "ASCII";
		}

		/* convert to UTF-8 */
		if (strcasecmp(encoding, "utf-8")) {
			iconv_t cd = iconv_open("utf-8", encoding);

			if (cd != (iconv_t)-1) {
				char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
				size_t tmp_len = strlen(str);
				size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
				char *dst = malloc(dst_len + 1), *dst_tmp = dst;

				if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) {
					uint8_t *resbuf = malloc(dst_len * 2 + 1);
					size_t len = dst_len * 2; /* leave space for additional \0 byte */

					if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
						/* u8_tolower() does not terminate the result string */
						if (lower)
							*lower = strndup((char *)dst, len);
					} else {
						ret = PSL_ERR_TO_LOWER;
						/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
					}

					if (lower)
						*lower = strndup(dst, dst_len - dst_len_tmp);
					ret = PSL_SUCCESS;
				} else {
					ret = PSL_ERR_TO_UTF8;
					/* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
				}

				free(dst);
				iconv_close(cd);
			} else {
				ret = PSL_ERR_TO_UTF8;
				/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
			}
		} else
			ret = PSL_SUCCESS;

		/* convert to lowercase */
		if (ret == PSL_SUCCESS) {
			uint8_t *dst, resbuf[256];
			size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

			/* we need a conversion to lowercase */
			if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
				/* u8_tolower() does not terminate the result string */
				if (lower)
					*lower = strndup((char *)dst, len);
			} else {
				ret = PSL_ERR_TO_LOWER;
				/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
			}
		}

	} while (0);
#endif

	return ret;
}
Example #15
0
int32_t __hs_u_strToLower(UChar *dest, int32_t destCapacity,
			  const UChar *src, int32_t srcLength,
			  const char *locale, UErrorCode *pErrorCode)
{
    return u_strToLower(dest, destCapacity, src, srcLength, locale, pErrorCode);
}