Ejemplo n.º 1
0
//--------------------------------------------------------------------------------------------------
ssize_t le_utf8_NumChars
(
    const char* string      ///< [IN] Pointer to the string.
)
{
    uint_fast8_t i;
    int_fast8_t numBytes = 0;
    size_t strIndex = 0;
    size_t numChars = 0;

    // Check parameters.
    if (string == NULL)
    {
        return 0;
    }

    while (string[strIndex] != '\0')
    {
        numBytes = NumBytesInChar(string[strIndex]);

        if (numBytes < 0)
        {
            return LE_FORMAT_ERROR;
        }

        // Go through the bytes in this character to make sure all bytes are formatted correctly.
        for (i = 1; i < numBytes; i++)
        {
            if ( !IS_CONTINUATION_BYTE(string[++strIndex]) )
            {
                return LE_FORMAT_ERROR;
            }
        }

        // This character is correct.
        numChars++;

        // Move on.
        strIndex++;
    }

    return numChars;
}
Ejemplo n.º 2
0
//--------------------------------------------------------------------------------------------------
bool le_utf8_IsFormatCorrect
(
    const char* string      ///< [IN] The string.
)
{
    uint8_t i;
    int8_t numBytes = 0;
    size_t strIndex = 0;

    // Check parameters.
    if (string == NULL)
    {
        return false;
    }

    while (string[strIndex] != '\0')
    {
        numBytes = NumBytesInChar(string[strIndex]);

        if (numBytes < 0)
        {
            return false;
        }

        // Go through the bytes in this character to make sure all bytes are formatted correctly.
        for (i = 1; i < numBytes; i++)
        {
            if ( !IS_CONTINUATION_BYTE(string[++strIndex]) )
            {
                return false;
            }
        }

        // Move on.
        strIndex++;
    }

    return true;
}
Ejemplo n.º 3
0
EAPI Eina_Unicode
evas_common_encoding_utf8_get_next(const char *buf, int *iindex)
{
   /* Reads UTF8 bytes from @buf, starting at *@index and returns
    * the decoded code point at iindex offset, and advances iindex
    * to the next code point after this.
    *
    * Returns 0 to indicate there is no next char
    */
   /* Note: we don't currently handle overlong forms and some other
    * broken cases. */
   int index = *iindex;
   Eina_Unicode r;
   unsigned char d;

   /* if this char is the null terminator, exit */
   if ((d = buf[index++]) == 0) return 0;

   if ((d & 0x80) == 0)
     { // 1 byte (7bit) - 0xxxxxxx
        *iindex = index;
        return d;
     }
   if ((d & 0xe0) == 0xc0)
     { // 2 byte (11bit) - 110xxxxx 10xxxxxx
        r  = (d & 0x1f) << 6;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (!r) goto error;
        *iindex = index;
        return r;
     }
   if ((d & 0xf0) == 0xe0)
     { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
        r  = (d & 0x0f) << 12;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 6;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (!r) goto error;
        *iindex = index;
        return r;
     }
   if ((d & 0xf8) == 0xf0)
     { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
        r  = (d & 0x07) << 18;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 12;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 6;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (!r) goto error;
        *iindex = index;
        return r;
     }
   if ((d & 0xfc) == 0xf8)
     { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
        r  = (d & 0x03) << 24;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 18;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 12;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 6;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (!r) goto error;
        *iindex = index;
        return r;
     }
   if ((d & 0xfe) == 0xfc)
     { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
        r  = (d & 0x01) << 30;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 24;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 18;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 12;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f) << 6;
        if (((d = buf[index++]) == 0) || IS_INVALID_BYTE(d) ||
            !IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (!r) goto error;
        *iindex = index;
        return r;
     }

/* Gets here where there was an error and we want to replace the char
 * we just use the invalid unicode codepoints 8 lower bits represent
 * the original char */
error:
   d = buf[*iindex];
   (*iindex)++;
   return ERROR_REPLACEMENT_BASE | d;
}