Пример #1
0
uint8_t *
u8_conv_from_encoding (const char *fromcode,
                       enum iconv_ilseq_handler handler,
                       const char *src, size_t srclen,
                       size_t *offsets,
                       uint8_t *resultbuf, size_t *lengthp)
{
    if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
        /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
        uint8_t *result;

        if (u8_check ((const uint8_t *) src, srclen))
        {
            errno = EILSEQ;
            return NULL;
        }

        if (offsets != NULL)
        {
            size_t i;

            for (i = 0; i < srclen; )
            {
                int count = u8_mblen ((const uint8_t *) src + i, srclen - i);
                /* We can rely on count > 0 because of the previous u8_check.  */
                if (count <= 0)
                    abort ();
                offsets[i] = i;
                i++;
                while (--count > 0)
                    offsets[i++] = (size_t)(-1);
            }
        }

        /* Memory allocation.  */
        if (resultbuf != NULL && *lengthp >= srclen)
            result = resultbuf;
        else
        {
            result = (uint8_t *) malloc (srclen > 0 ? srclen : 1);
            if (result == NULL)
            {
                errno = ENOMEM;
                return NULL;
            }
        }

        memcpy ((char *) result, src, srclen);
        *lengthp = srclen;
        return result;
    }
    else
    {
        char *result = (char *) resultbuf;
        size_t length = *lengthp;

        if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler,
                          offsets, &result, &length) < 0)
            return NULL;

        if (result == NULL) /* when (resultbuf == NULL && length == 0)  */
        {
            result = (char *) malloc (1);
            if (result == NULL)
            {
                errno = ENOMEM;
                return NULL;
            }
        }
        *lengthp = length;
        return (uint8_t *) result;
    }
}
int
main ()
{
  int ret;

  /* Test zero-length input.  */
  {
    static const uint8_t input[] = "";
    ret = u8_mblen (input, 0);
    ASSERT (ret == -1);
  }

  /* Test NUL unit input.  */
  {
    static const uint8_t input[] = "";
    ret = u8_mblen (input, 1);
    ASSERT (ret == 0);
  }

  /* Test ISO 646 unit input.  */
  {
    ucs4_t c;
    uint8_t buf[1];

    for (c = 1; c < 0x80; c++)
      {
        buf[0] = c;
        ret = u8_mblen (buf, 1);
        ASSERT (ret == 1);
      }
  }

  /* Test 2-byte character input.  */
  {
    static const uint8_t input[] = { 0xC3, 0x97 };
    ret = u8_mblen (input, 2);
    ASSERT (ret == 2);
  }

  /* Test 3-byte character input.  */
  {
    static const uint8_t input[] = { 0xE2, 0x82, 0xAC };
    ret = u8_mblen (input, 3);
    ASSERT (ret == 3);
  }

  /* Test 4-byte character input.  */
  {
    static const uint8_t input[] = { 0xF4, 0x8F, 0xBF, 0xBD };
    ret = u8_mblen (input, 4);
    ASSERT (ret == 4);
  }

  /* Test incomplete/invalid 1-byte input.  */
  {
    static const uint8_t input[] = { 0xC1 };
    ret = u8_mblen (input, 1);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xC3 };
    ret = u8_mblen (input, 1);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xE2 };
    ret = u8_mblen (input, 1);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF4 };
    ret = u8_mblen (input, 1);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xFE };
    ret = u8_mblen (input, 1);
    ASSERT (ret == -1);
  }

  /* Test incomplete/invalid 2-byte input.  */
  {
    static const uint8_t input[] = { 0xE0, 0x9F };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xE2, 0x82 };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xE2, 0xD0 };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF0, 0x8F };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF3, 0x8F };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF3, 0xD0 };
    ret = u8_mblen (input, 2);
    ASSERT (ret == -1);
  }

  /* Test incomplete/invalid 3-byte input.  */
  {
    static const uint8_t input[] = { 0xF3, 0x8F, 0xBF };
    ret = u8_mblen (input, 3);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF3, 0xD0, 0xBF };
    ret = u8_mblen (input, 3);
    ASSERT (ret == -1);
  }
  {
    static const uint8_t input[] = { 0xF3, 0x8F, 0xD0 };
    ret = u8_mblen (input, 3);
    ASSERT (ret == -1);
  }

  return 0;
}