static void TestSurrogate(){ static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd}; int i = 0; while (i < 5) { UChar first = UTF_FIRST_SURROGATE(s[i]); UChar second = UTF_SECOND_SURROGATE(s[i]); /* algorithm from the Unicode consortium */ UChar firstresult = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800); UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00); if (first != UTF16_LEAD(s[i]) || first != U16_LEAD(s[i]) || first != firstresult) { log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n", s[i], firstresult); } if (second != UTF16_TRAIL(s[i]) || second != U16_TRAIL(s[i]) || second != secondresult) { log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n", s[i], secondresult); } i ++; } }
U_CAPI UChar* U_EXPORT2 u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode) { int32_t reqLength = 0; uint32_t ch =0; UChar *pDestLimit =dest+destCapacity; UChar *pDest = dest; const uint32_t *pSrc = (const uint32_t *)src; /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } /* Check if the source is null terminated */ if(srcLength == -1 ){ while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){ ++pSrc; if(ch<=0xFFFF){ *(pDest++)=(UChar)ch; }else if(ch<=0x10ffff){ *(pDest++)=UTF16_LEAD(ch); if(pDest<pDestLimit){ *(pDest++)=UTF16_TRAIL(ch); }else{ reqLength++; break; } }else{ *pErrorCode = U_INVALID_CHAR_FOUND; return NULL; } } while((ch=*pSrc++) != 0){ reqLength+=UTF_CHAR_LENGTH(ch); } }else{ const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength; while((pSrc < pSrcLimit) && (pDest < pDestLimit)){ ch = *pSrc++; if(ch<=0xFFFF){ *(pDest++)=(UChar)ch; }else if(ch<=0x10FFFF){ *(pDest++)=UTF16_LEAD(ch); if(pDest<pDestLimit){ *(pDest++)=UTF16_TRAIL(ch); }else{ reqLength++; break; } }else{ *pErrorCode = U_INVALID_CHAR_FOUND; return NULL; } } while(pSrc <pSrcLimit){ ch = *pSrc++; reqLength+=UTF_CHAR_LENGTH(ch); } } reqLength += (int32_t)(pDest - dest); if(pDestLength){ *pDestLength = reqLength; } /* Terminate the buffer */ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); return dest; }
U_CAPI UChar* U_EXPORT2 u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char* src, int32_t srcLength, UErrorCode *pErrorCode){ UChar *pDest = dest; UChar *pDestLimit = dest+destCapacity; UChar32 ch=0; int32_t index = 0; int32_t reqLength = 0; uint8_t* pSrc = (uint8_t*) src; /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength == -1){ srcLength = (int32_t)uprv_strlen((char*)pSrc); } while((index < srcLength)&&(pDest<pDestLimit)){ ch = pSrc[index++]; if(ch <=0x7f){ *pDest++=(UChar)ch; }else{ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); if(ch<0){ *pErrorCode = U_INVALID_CHAR_FOUND; return NULL; }else if(ch<=0xFFFF){ *(pDest++)=(UChar)ch; }else{ *(pDest++)=UTF16_LEAD(ch); if(pDest<pDestLimit){ *(pDest++)=UTF16_TRAIL(ch); }else{ reqLength++; break; } } } } /* donot fill the dest buffer just count the UChars needed */ while(index < srcLength){ ch = pSrc[index++]; if(ch <= 0x7f){ reqLength++; }else{ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); if(ch<0){ *pErrorCode = U_INVALID_CHAR_FOUND; return NULL; } reqLength+=UTF_CHAR_LENGTH(ch); } } reqLength+=(int32_t)(pDest - dest); if(pDestLength){ *pDestLength = reqLength; } /* Terminate the buffer */ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); return dest; }