Ejemplo n.º 1
0
/* validate one encoded unicode char and return its length */
int utf8_encoded_valid_unichar(const char *str)
{
    int len;
    int unichar;
    int i;

    len = utf8_encoded_expected_len(str);
    if (len == 0)
        return -1;

    /* ascii is valid */
    if (len == 1)
        return 1;

    /* check if expected encoded chars are available */
    for (i = 0; i < len; i++)
        if ((str[i] & 0x80) != 0x80)
            return -1;

    unichar = utf8_encoded_to_unichar(str);

    /* check if encoded length matches encoded value */
    if (utf8_unichar_to_encoded_len(unichar) != len)
        return -1;

    /* check if value has valid range */
    if (!utf8_unichar_valid_range(unichar))
        return -1;

    return len;
}
Ejemplo n.º 2
0
/* decode one unicode char */
int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
        char32_t unichar;
        int len, i;

        assert(str);

        len = utf8_encoded_expected_len(str);

        switch (len) {
        case 1:
                *ret_unichar = (char32_t)str[0];
                return 0;
        case 2:
                unichar = str[0] & 0x1f;
                break;
        case 3:
                unichar = (char32_t)str[0] & 0x0f;
                break;
        case 4:
                unichar = (char32_t)str[0] & 0x07;
                break;
        case 5:
                unichar = (char32_t)str[0] & 0x03;
                break;
        case 6:
                unichar = (char32_t)str[0] & 0x01;
                break;
        default:
                return -EINVAL;
        }

        for (i = 1; i < len; i++) {
                if (((char32_t)str[i] & 0xc0) != 0x80)
                        return -EINVAL;
                unichar <<= 6;
                unichar |= (char32_t)str[i] & 0x3f;
        }

        *ret_unichar = unichar;

        return 0;
}
Ejemplo n.º 3
0
/* decode one unicode char */
static int utf8_encoded_to_unichar(const char *str)
{
    int unichar;
    int len;
    int i;

    len = utf8_encoded_expected_len(str);
    switch (len) {
    case 1:
        return (int)str[0];
    case 2:
        unichar = str[0] & 0x1f;
        break;
    case 3:
        unichar = (int)str[0] & 0x0f;
        break;
    case 4:
        unichar = (int)str[0] & 0x07;
        break;
    case 5:
        unichar = (int)str[0] & 0x03;
        break;
    case 6:
        unichar = (int)str[0] & 0x01;
        break;
    default:
        return -1;
    }

    for (i = 1; i < len; i++) {
        if (((int)str[i] & 0xc0) != 0x80)
            return -1;
        unichar <<= 6;
        unichar |= (int)str[i] & 0x3f;
    }

    return unichar;
}