CString Utf32ToUtf16( UChar32 const* pqz, int const cch ) { UChar* pwzDest = nullptr; int cchDest = 0; UErrorCode errorCode = U_ZERO_ERROR; u_strFromUTF32( nullptr, 0, &cchDest, pqz, cch, &errorCode ); if ( U_BUFFER_OVERFLOW_ERROR != errorCode && U_STRING_NOT_TERMINATED_WARNING != errorCode ) { debug( L"Utf32ToUtf16/n: u_strFromUTF32 failed, errorCode=%d\n", errorCode ); return CString( ); } int cchDestCapacity = cchDest + 1; pwzDest = new UChar[cchDestCapacity]; cchDest = 0; errorCode = U_ZERO_ERROR; u_strFromUTF32( pwzDest, cchDestCapacity, &cchDest, pqz, cch, &errorCode ); if ( U_ZERO_ERROR != errorCode && U_STRING_NOT_TERMINATED_WARNING != errorCode ) { debug( L"Utf32ToUtf16/n: u_strFromUTF32 failed, errorCode=%d\n", errorCode ); delete[] pwzDest; return CString( ); } CString tmp( pwzDest, cchDest ); delete[] pwzDest; return tmp; }
static uint32_t getToUnicodeValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { UChar32 *u32; UChar *u; uint32_t value; int32_t u16Length, ratio; UErrorCode errorCode; /* write the Unicode result code point or string index */ if(m->uLen==1) { u16Length=U16_LENGTH(m->u); value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u); } else { /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */ /* get the result code point string and its 16-bit string length */ u32=UCM_GET_CODE_POINTS(table, m); errorCode=U_ZERO_ERROR; u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode); if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { exit(errorCode); } /* allocate it and put its length and index into the value */ value= (((uint32_t)m->uLen+UCNV_EXT_TO_U_LENGTH_OFFSET)<<UCNV_EXT_TO_U_LENGTH_SHIFT)| ((uint32_t)utm_countItems(extData->toUUChars)); u=utm_allocN(extData->toUUChars, u16Length); /* write the result 16-bit string */ errorCode=U_ZERO_ERROR; u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode); if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { exit(errorCode); } } if(m->f==0) { value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG; } /* update statistics */ if(m->bLen>extData->maxInBytes) { extData->maxInBytes=m->bLen; } if(u16Length>extData->maxOutUChars) { extData->maxOutUChars=u16Length; } ratio=(u16Length+(m->bLen-1))/m->bLen; if(ratio>extData->maxUCharsPerByte) { extData->maxUCharsPerByte=ratio; } return value; }
static PyObject* icu_swap_case(PyObject *self, PyObject *input) { PyObject *result = NULL; UErrorCode status = U_ZERO_ERROR; UChar *input_buf = NULL, *output_buf = NULL; UChar32 *buf = NULL; int32_t sz = 0, sz32 = 0, i = 0; input_buf = python_to_icu(input, &sz); if (input_buf == NULL) goto end; output_buf = (UChar*) calloc(3 * sz, sizeof(UChar)); buf = (UChar32*) calloc(2 * sz, sizeof(UChar32)); if (output_buf == NULL || buf == NULL) { PyErr_NoMemory(); goto end; } u_strToUTF32(buf, 2 * sz, &sz32, input_buf, sz, &status); for (i = 0; i < sz32; i++) { if (u_islower(buf[i])) buf[i] = u_toupper(buf[i]); else if (u_isupper(buf[i])) buf[i] = u_tolower(buf[i]); } u_strFromUTF32(output_buf, 3*sz, &sz, buf, sz32, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } result = icu_to_python(output_buf, sz); end: if (input_buf != NULL) free(input_buf); if (output_buf != NULL) free(output_buf); if (buf != NULL) free(buf); return result; } // }}}
static size_t AppendToBuffer(UChar *buff, const UChar *buffer_last, WChar c) { /* Transform from UTF-32 to internal ICU format of UTF-16. */ int32 length = 0; UErrorCode err = U_ZERO_ERROR; u_strFromUTF32(buff, buffer_last - buff, &length, (UChar32*)&c, 1, &err); return length; }
// chr {{{ static PyObject * icu_chr(PyObject *self, PyObject *args) { UErrorCode status = U_ZERO_ERROR; UChar32 code = 0; UChar buf[5] = {0}; int32_t sz = 0; if (!PyArg_ParseTuple(args, "I", &code)) return NULL; u_strFromUTF32(buf, 4, &sz, &code, 1, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); return NULL; } return icu_to_python(buf, sz); } // }}}
CString Utf32ToUtf16( UChar32 const wch ) { UChar wchDest = L'\0'; int cchDest = 1; UErrorCode errorCode = U_ZERO_ERROR; u_strFromUTF32( &wchDest, 1, &cchDest, &wch, 1, &errorCode ); if ( U_STRING_NOT_TERMINATED_WARNING != errorCode ) { debug( L"Utf32ToUtf16/1: u_strFromUTF32 failed, errorCode=%d\n", errorCode ); return CString( ); } else { return CString( wchDest ); } }
U_CAPI UChar* U_EXPORT2 u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode) { /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if( (src==NULL && srcLength!=0) || srcLength < -1 || (destCapacity<0) || (dest == NULL && destCapacity > 0) ) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } #ifdef U_WCHAR_IS_UTF16 /* wchar_t is UTF-16 just do a memcpy */ if(srcLength == -1){ srcLength = u_strlen(src); } if(0 < srcLength && srcLength <= destCapacity){ uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); } if(pDestLength){ *pDestLength = srcLength; } u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); return dest; #elif defined U_WCHAR_IS_UTF32 return u_strFromUTF32(dest, destCapacity, pDestLength, (UChar32*)src, srcLength, pErrorCode); #else return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); #endif }
// chr {{{ static PyObject * icu_chr(PyObject *self, PyObject *args) { UErrorCode status = U_ZERO_ERROR; UChar32 code = 0; UChar buf[5] = {0}; int32_t sz = 0; char utf8[21]; PyObject *result = NULL; if (!PyArg_ParseTuple(args, "I", &code)) return NULL; u_strFromUTF32(buf, 4, &sz, &code, 1, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); goto end; } u_strToUTF8(utf8, 20, &sz, buf, sz, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); goto end; } result = PyUnicode_DecodeUTF8(utf8, sz, "strict"); end: return result; } // }}}
static int32_t convertFromPuny( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UErrorCode& status){ char b1Stack[MAX_LABEL_BUFFER_SIZE]; char* b1 = b1Stack; int32_t destLen =0; convertUCharsToASCII(src, b1,srcLength); uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; uint32_t* b2 = b2Stack; int32_t b2Len =MAX_LABEL_BUFFER_SIZE; unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); status = getError(error); if(status == U_BUFFER_OVERFLOW_ERROR){ b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); if(b2 == NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); status = getError(error); } if(U_FAILURE(status)){ goto CLEANUP; } u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); CLEANUP: if(b1Stack != b1){ uprv_free(b1); } if(b2Stack != b2){ uprv_free(b2); } uprv_free(caseFlags); return destLen; }
EXPORT UnicodeString &PyObject_AsUnicodeString(PyObject *object, const char *encoding, const char *mode, UnicodeString &string) { if (PyUnicode_Check(object)) { if (sizeof(Py_UNICODE) == sizeof(UChar)) string.setTo((const UChar *) PyUnicode_AS_UNICODE(object), (int32_t) PyUnicode_GET_SIZE(object)); else { int32_t len = (int32_t) PyUnicode_GET_SIZE(object); Py_UNICODE *pchars = PyUnicode_AS_UNICODE(object); UChar *chars = new UChar[len * 3]; UErrorCode status = U_ZERO_ERROR; int32_t dstLen; u_strFromUTF32(chars, len * 3, &dstLen, (const UChar32 *) pchars, len, &status); if (U_FAILURE(status)) { delete[] chars; throw ICUException(status); } string.setTo((const UChar *) chars, (int32_t) dstLen); delete[] chars; } } else if (PyBytes_Check(object)) PyBytes_AsUnicodeString(object, encoding, mode, string); else { PyErr_SetObject(PyExc_TypeError, object); throw ICUException(); } return string; }
/* parse a mapping line; must not be empty */ U_CAPI UBool U_EXPORT2 ucm_parseMappingLine(UCMapping *m, UChar32 codePoints[UCNV_EXT_MAX_UCHARS], uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line) { const char *s; char *end; UChar32 cp; int32_t u16Length; int8_t uLen, bLen, f; s=line; uLen=bLen=0; /* parse code points */ for(;;) { /* skip an optional plus sign */ if(uLen>0 && *s=='+') { ++s; } if(*s!='<') { break; } if( s[1]!='U' || (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || *end!='>' ) { fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); return FALSE; } if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); return FALSE; } if(uLen==UCNV_EXT_MAX_UCHARS) { fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); return FALSE; } codePoints[uLen++]=cp; s=end+1; } if(uLen==0) { fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); return FALSE; } else if(uLen==1) { m->u=codePoints[0]; } else { UErrorCode errorCode=U_ZERO_ERROR; u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || u16Length>UCNV_EXT_MAX_UCHARS ) { fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); return FALSE; } } s=u_skipWhitespace(s); /* parse bytes */ bLen=ucm_parseBytes(bytes, line, &s); if(bLen<0) { return FALSE; } else if(bLen==0) { fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); return FALSE; } else if(bLen<=4) { uprv_memcpy(m->b.bytes, bytes, bLen); } /* skip everything until the fallback indicator, even the start of a comment */ for(;;) { if(*s==0) { f=-1; /* no fallback indicator */ break; } else if(*s=='|') { f=(int8_t)(s[1]-'0'); if((uint8_t)f>4) { fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); return FALSE; } break; } ++s; } m->uLen=uLen; m->bLen=bLen; m->f=f; return TRUE; }