uint8_t * u8_conv_from_encoding (const char *fromcode, enum iconv_ilseq_handler handler, const char *src, size_t srclen, size_t *offsets, uint8_t *resultbuf, size_t *lengthp) { if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) { /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ uint8_t *result; if (u8_check ((const uint8_t *) src, srclen)) { errno = EILSEQ; return NULL; } if (offsets != NULL) { size_t i; for (i = 0; i < srclen; ) { int count = u8_mblen ((const uint8_t *) src + i, srclen - i); /* We can rely on count > 0 because of the previous u8_check. */ if (count <= 0) abort (); offsets[i] = i; i++; while (--count > 0) offsets[i++] = (size_t)(-1); } } /* Memory allocation. */ if (resultbuf != NULL && *lengthp >= srclen) result = resultbuf; else { result = (uint8_t *) malloc (srclen > 0 ? srclen : 1); if (result == NULL) { errno = ENOMEM; return NULL; } } memcpy ((char *) result, src, srclen); *lengthp = srclen; return result; } else { char *result = (char *) resultbuf; size_t length = *lengthp; if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler, offsets, &result, &length) < 0) return NULL; if (result == NULL) /* when (resultbuf == NULL && length == 0) */ { result = (char *) malloc (1); if (result == NULL) { errno = ENOMEM; return NULL; } } *lengthp = length; return (uint8_t *) result; } }
int main () { int ret; /* Test zero-length input. */ { static const uint8_t input[] = ""; ret = u8_mblen (input, 0); ASSERT (ret == -1); } /* Test NUL unit input. */ { static const uint8_t input[] = ""; ret = u8_mblen (input, 1); ASSERT (ret == 0); } /* Test ISO 646 unit input. */ { ucs4_t c; uint8_t buf[1]; for (c = 1; c < 0x80; c++) { buf[0] = c; ret = u8_mblen (buf, 1); ASSERT (ret == 1); } } /* Test 2-byte character input. */ { static const uint8_t input[] = { 0xC3, 0x97 }; ret = u8_mblen (input, 2); ASSERT (ret == 2); } /* Test 3-byte character input. */ { static const uint8_t input[] = { 0xE2, 0x82, 0xAC }; ret = u8_mblen (input, 3); ASSERT (ret == 3); } /* Test 4-byte character input. */ { static const uint8_t input[] = { 0xF4, 0x8F, 0xBF, 0xBD }; ret = u8_mblen (input, 4); ASSERT (ret == 4); } /* Test incomplete/invalid 1-byte input. */ { static const uint8_t input[] = { 0xC1 }; ret = u8_mblen (input, 1); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xC3 }; ret = u8_mblen (input, 1); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xE2 }; ret = u8_mblen (input, 1); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF4 }; ret = u8_mblen (input, 1); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xFE }; ret = u8_mblen (input, 1); ASSERT (ret == -1); } /* Test incomplete/invalid 2-byte input. */ { static const uint8_t input[] = { 0xE0, 0x9F }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xE2, 0x82 }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xE2, 0xD0 }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF0, 0x8F }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF3, 0x8F }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF3, 0xD0 }; ret = u8_mblen (input, 2); ASSERT (ret == -1); } /* Test incomplete/invalid 3-byte input. */ { static const uint8_t input[] = { 0xF3, 0x8F, 0xBF }; ret = u8_mblen (input, 3); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF3, 0xD0, 0xBF }; ret = u8_mblen (input, 3); ASSERT (ret == -1); } { static const uint8_t input[] = { 0xF3, 0x8F, 0xD0 }; ret = u8_mblen (input, 3); ASSERT (ret == -1); } return 0; }