Beispiel #1
0
CString Utf32ToUtf16( UChar32 const* pqz, int const cch ) {
	UChar* pwzDest = nullptr;
	int cchDest = 0;

	UErrorCode errorCode = U_ZERO_ERROR;
	u_strFromUTF32( nullptr, 0, &cchDest, pqz, cch, &errorCode );
	if ( U_BUFFER_OVERFLOW_ERROR != errorCode && U_STRING_NOT_TERMINATED_WARNING != errorCode ) {
		debug( L"Utf32ToUtf16/n: u_strFromUTF32 failed, errorCode=%d\n", errorCode );
		return CString( );
	}

	int cchDestCapacity = cchDest + 1;
	pwzDest = new UChar[cchDestCapacity];
	cchDest = 0;

	errorCode = U_ZERO_ERROR;
	u_strFromUTF32( pwzDest, cchDestCapacity, &cchDest, pqz, cch, &errorCode );
	if ( U_ZERO_ERROR != errorCode && U_STRING_NOT_TERMINATED_WARNING != errorCode ) {
		debug( L"Utf32ToUtf16/n: u_strFromUTF32 failed, errorCode=%d\n", errorCode );
		delete[] pwzDest;
		return CString( );
	}

	CString tmp( pwzDest, cchDest );
	delete[] pwzDest;
	return tmp;
}
Beispiel #2
0
static uint32_t
getToUnicodeValue(CnvExtData *extData, UCMTable *table, UCMapping *m) {
    UChar32 *u32;
    UChar *u;
    uint32_t value;
    int32_t u16Length, ratio;
    UErrorCode errorCode;

    /* write the Unicode result code point or string index */
    if(m->uLen==1) {
        u16Length=U16_LENGTH(m->u);
        value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u);
    } else {
        /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */

        /* get the result code point string and its 16-bit string length */
        u32=UCM_GET_CODE_POINTS(table, m);
        errorCode=U_ZERO_ERROR;
        u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode);
        if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) {
            exit(errorCode);
        }

        /* allocate it and put its length and index into the value */
        value=
            (((uint32_t)m->uLen+UCNV_EXT_TO_U_LENGTH_OFFSET)<<UCNV_EXT_TO_U_LENGTH_SHIFT)|
            ((uint32_t)utm_countItems(extData->toUUChars));
        u=utm_allocN(extData->toUUChars, u16Length);

        /* write the result 16-bit string */
        errorCode=U_ZERO_ERROR;
        u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode);
        if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) {
            exit(errorCode);
        }
    }
    if(m->f==0) {
        value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG;
    }

    /* update statistics */
    if(m->bLen>extData->maxInBytes) {
        extData->maxInBytes=m->bLen;
    }
    if(u16Length>extData->maxOutUChars) {
        extData->maxOutUChars=u16Length;
    }

    ratio=(u16Length+(m->bLen-1))/m->bLen;
    if(ratio>extData->maxUCharsPerByte) {
        extData->maxUCharsPerByte=ratio;
    }

    return value;
}
Beispiel #3
0
static PyObject* icu_swap_case(PyObject *self, PyObject *input) {
    PyObject *result = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar *input_buf = NULL, *output_buf = NULL;
    UChar32 *buf = NULL;
    int32_t sz = 0, sz32 = 0, i = 0;

    input_buf = python_to_icu(input, &sz);
    if (input_buf == NULL) goto end;
    output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
    buf = (UChar32*) calloc(2 * sz, sizeof(UChar32));
    if (output_buf == NULL || buf == NULL) { PyErr_NoMemory(); goto end; }
    u_strToUTF32(buf, 2 * sz, &sz32, input_buf, sz, &status);

    for (i = 0; i < sz32; i++) {
        if (u_islower(buf[i])) buf[i] = u_toupper(buf[i]);
        else if (u_isupper(buf[i])) buf[i] = u_tolower(buf[i]);
    }
    u_strFromUTF32(output_buf, 3*sz, &sz, buf, sz32, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; }
    result = icu_to_python(output_buf, sz);

end:
    if (input_buf != NULL) free(input_buf);
    if (output_buf != NULL) free(output_buf);
    if (buf != NULL) free(buf);
    return result;

} // }}}
Beispiel #4
0
	static size_t AppendToBuffer(UChar *buff, const UChar *buffer_last, WChar c)
	{
		/* Transform from UTF-32 to internal ICU format of UTF-16. */
		int32 length = 0;
		UErrorCode err = U_ZERO_ERROR;
		u_strFromUTF32(buff, buffer_last - buff, &length, (UChar32*)&c, 1, &err);
		return length;
	}
Beispiel #5
0
// chr {{{
static PyObject *
icu_chr(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
    UChar32 code = 0;
    UChar buf[5] = {0};
    int32_t sz = 0;

    if (!PyArg_ParseTuple(args, "I", &code)) return NULL;

    u_strFromUTF32(buf, 4, &sz, &code, 1, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); return NULL; }
    return icu_to_python(buf, sz);
} // }}}
Beispiel #6
0
CString Utf32ToUtf16( UChar32 const wch ) {
	UChar wchDest = L'\0';
	int cchDest = 1;

	UErrorCode errorCode = U_ZERO_ERROR;
	u_strFromUTF32( &wchDest, 1, &cchDest, &wch, 1, &errorCode );
	if ( U_STRING_NOT_TERMINATED_WARNING != errorCode ) {
		debug( L"Utf32ToUtf16/1: u_strFromUTF32 failed, errorCode=%d\n", errorCode );
		return CString( );
	} else {
		return CString( wchDest );
	}
}
Beispiel #7
0
U_CAPI UChar* U_EXPORT2
u_strFromWCS(UChar   *dest,
             int32_t destCapacity,
             int32_t *pDestLength,
             const wchar_t *src,
             int32_t srcLength,
             UErrorCode *pErrorCode)
{

    /* args check */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
        return NULL;
    }

    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
        (destCapacity<0) || (dest == NULL && destCapacity > 0)
    ) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

#ifdef U_WCHAR_IS_UTF16
    /* wchar_t is UTF-16 just do a memcpy */
    if(srcLength == -1){
        srcLength = u_strlen(src);
    }
    if(0 < srcLength && srcLength <= destCapacity){
        uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
    }
    if(pDestLength){
       *pDestLength = srcLength;
    }

    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);

    return dest;

#elif defined U_WCHAR_IS_UTF32

    return u_strFromUTF32(dest, destCapacity, pDestLength,
                          (UChar32*)src, srcLength, pErrorCode);

#else

    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);

#endif

}
Beispiel #8
0
// chr {{{
static PyObject *
icu_chr(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
    UChar32 code = 0;
    UChar buf[5] = {0};
    int32_t sz = 0;
    char utf8[21];
    PyObject *result = NULL;
  
    if (!PyArg_ParseTuple(args, "I", &code)) return NULL;

    u_strFromUTF32(buf, 4, &sz, &code, 1, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); goto end; }
    u_strToUTF8(utf8, 20, &sz, buf, sz, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); goto end; }
    result = PyUnicode_DecodeUTF8(utf8, sz, "strict");
end:
    return result;
} // }}}
Beispiel #9
0
static int32_t convertFromPuny(  const UChar* src, int32_t srcLength,
								 UChar* dest, int32_t destCapacity,
                                 UErrorCode& status){
    char b1Stack[MAX_LABEL_BUFFER_SIZE];
    char* b1 = b1Stack;
    int32_t destLen =0;

    convertUCharsToASCII(src, b1,srcLength);

    uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
    uint32_t* b2 = b2Stack;
    int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
    unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
    punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
    status = getError(error);
    if(status == U_BUFFER_OVERFLOW_ERROR){
        b2 =  (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
        if(b2 == NULL){
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CLEANUP;
        }
        error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
        status = getError(error);
    }

    if(U_FAILURE(status)){
        goto CLEANUP;
    }

    u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);

CLEANUP:
    if(b1Stack != b1){
        uprv_free(b1);
    }
    if(b2Stack != b2){
        uprv_free(b2);
    }
    uprv_free(caseFlags);

    return destLen;   
}
Beispiel #10
0
EXPORT UnicodeString &PyObject_AsUnicodeString(PyObject *object,
                                               const char *encoding,
                                               const char *mode,
                                               UnicodeString &string)
{
    if (PyUnicode_Check(object))
    {
        if (sizeof(Py_UNICODE) == sizeof(UChar))
            string.setTo((const UChar *) PyUnicode_AS_UNICODE(object),
                         (int32_t) PyUnicode_GET_SIZE(object));
        else
        {
            int32_t len = (int32_t) PyUnicode_GET_SIZE(object);
            Py_UNICODE *pchars = PyUnicode_AS_UNICODE(object);
            UChar *chars = new UChar[len * 3];
            UErrorCode status = U_ZERO_ERROR;
            int32_t dstLen;

            u_strFromUTF32(chars, len * 3, &dstLen,
                           (const UChar32 *) pchars, len, &status);

            if (U_FAILURE(status))
            {
                delete[] chars;
                throw ICUException(status);
            }

            string.setTo((const UChar *) chars, (int32_t) dstLen);
            delete[] chars;
        }
    }
    else if (PyBytes_Check(object))
        PyBytes_AsUnicodeString(object, encoding, mode, string);
    else
    {
        PyErr_SetObject(PyExc_TypeError, object);
        throw ICUException();
    }

    return string;
}
Beispiel #11
0
/* parse a mapping line; must not be empty */
U_CAPI UBool U_EXPORT2
ucm_parseMappingLine(UCMapping *m,
                     UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
                     uint8_t bytes[UCNV_EXT_MAX_BYTES],
                     const char *line) {
    const char *s;
    char *end;
    UChar32 cp;
    int32_t u16Length;
    int8_t uLen, bLen, f;

    s=line;
    uLen=bLen=0;

    /* parse code points */
    for(;;) {
        /* skip an optional plus sign */
        if(uLen>0 && *s=='+') {
            ++s;
        }
        if(*s!='<') {
            break;
        }

        if( s[1]!='U' ||
            (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 ||
            *end!='>'
        ) {
            fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line);
            return FALSE;
        }
        if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) {
            fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line);
            return FALSE;
        }

        if(uLen==UCNV_EXT_MAX_UCHARS) {
            fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line);
            return FALSE;
        }
        codePoints[uLen++]=cp;
        s=end+1;
    }

    if(uLen==0) {
        fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line);
        return FALSE;
    } else if(uLen==1) {
        m->u=codePoints[0];
    } else {
        UErrorCode errorCode=U_ZERO_ERROR;
        u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode);
        if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) ||
            u16Length>UCNV_EXT_MAX_UCHARS
        ) {
            fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line);
            return FALSE;
        }
    }

    s=u_skipWhitespace(s);

    /* parse bytes */
    bLen=ucm_parseBytes(bytes, line, &s);

    if(bLen<0) {
        return FALSE;
    } else if(bLen==0) {
        fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line);
        return FALSE;
    } else if(bLen<=4) {
        uprv_memcpy(m->b.bytes, bytes, bLen);
    }

    /* skip everything until the fallback indicator, even the start of a comment */
    for(;;) {
        if(*s==0) {
            f=-1; /* no fallback indicator */
            break;
        } else if(*s=='|') {
            f=(int8_t)(s[1]-'0');
            if((uint8_t)f>4) {
                fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line);
                return FALSE;
            }
            break;
        }
        ++s;
    }

    m->uLen=uLen;
    m->bLen=bLen;
    m->f=f;
    return TRUE;
}