Пример #1
0
static INTVAL
utf16_partial_scan(PARROT_INTERP, ARGIN(const char *buf),
        ARGMOD(Parrot_String_Bounds *bounds))
{
    ASSERT_ARGS(utf16_partial_scan)
    const utf16_t * const p         = (const utf16_t *)buf;
    UINTVAL               len       = bounds->bytes >> 1;
    INTVAL                max_chars = bounds->chars;
    const INTVAL          delim     = bounds->delim;
    INTVAL                c         = -1;
    INTVAL                chars     = 0;
    INTVAL                res       = 0;
    UINTVAL               i;

    if (max_chars < 0)
        max_chars = len;

    for (i = 0; i < len && chars < max_chars; ++i) {
        c = p[i];

        if (UNICODE_IS_HIGH_SURROGATE(c)) {
            if (i + 1 >= len) {
                /* Two more bytes needed */
                res = 2;
                break;
            }

            ++i;

            if (!UNICODE_IS_LOW_SURROGATE(p[i]))
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
                    "Malformed UTF-16 string\n");

            c = UNICODE_DECODE_SURROGATE(c, p[i]);
        }
        else {
            if (UNICODE_IS_LOW_SURROGATE(c))
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
                    "Malformed UTF-16 string\n");
        }

        if (UNICODE_IS_NON_CHARACTER(c))
            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
                "Non-character in UTF-16 string\n");

        ++chars;

        if (c == delim) {
            i += 1;
            break;
        }
    }

    bounds->bytes = i << 1;
    bounds->chars = chars;
    bounds->delim = c;

    return res;
}
Пример #2
0
PARROT_WARN_UNUSED_RESULT
static UINTVAL
utf16_decode(PARROT_INTERP, ARGIN(const utf16_t *p))
{
    ASSERT_ARGS(utf16_decode)
    UINTVAL c = *p;

    if (UNICODE_IS_HIGH_SURROGATE(c))
        c = UNICODE_DECODE_SURROGATE(c, p[1]);

    return c;
}
Пример #3
0
PARROT_WARN_UNUSED_RESULT
static UINTVAL
utf16_scan(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(utf16_scan)
    const utf16_t *p   = (utf16_t *)src->strstart;
    UINTVAL        len = 0;
    UINTVAL        i, n;

    if (src->bufused & 1)
        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
            "Unaligned end in UTF-16 string\n");

    n = src->bufused >> 1;

    for (i = 0; i < n; ++i) {
        UINTVAL c = p[i];

        if (UNICODE_IS_HIGH_SURROGATE(c)) {
            ++i;

            if (i >= n)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
                    "Unaligned end in UTF-16 string\n");

            if (!UNICODE_IS_LOW_SURROGATE(p[i]))
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
                    "Malformed UTF-16 string\n");

            c = UNICODE_DECODE_SURROGATE(c, p[i]);
        }
        else {
            if (UNICODE_IS_LOW_SURROGATE(c))
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16,
                    "Malformed UTF-16 string\n");
        }

        if (UNICODE_IS_NON_CHARACTER(c))
            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
                "Non-character in UTF-16 string\n");

        ++len;
    }

    return len;
}