static INTVAL utf16_partial_scan(PARROT_INTERP, ARGIN(const char *buf), ARGMOD(Parrot_String_Bounds *bounds)) { ASSERT_ARGS(utf16_partial_scan) const utf16_t * const p = (const utf16_t *)buf; UINTVAL len = bounds->bytes >> 1; INTVAL max_chars = bounds->chars; const INTVAL delim = bounds->delim; INTVAL c = -1; INTVAL chars = 0; INTVAL res = 0; UINTVAL i; if (max_chars < 0) max_chars = len; for (i = 0; i < len && chars < max_chars; ++i) { c = p[i]; if (UNICODE_IS_HIGH_SURROGATE(c)) { if (i + 1 >= len) { /* Two more bytes needed */ res = 2; break; } ++i; if (!UNICODE_IS_LOW_SURROGATE(p[i])) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Malformed UTF-16 string\n"); c = UNICODE_DECODE_SURROGATE(c, p[i]); } else { if (UNICODE_IS_LOW_SURROGATE(c)) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Malformed UTF-16 string\n"); } if (UNICODE_IS_NON_CHARACTER(c)) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER, "Non-character in UTF-16 string\n"); ++chars; if (c == delim) { i += 1; break; } } bounds->bytes = i << 1; bounds->chars = chars; bounds->delim = c; return res; }
PARROT_WARN_UNUSED_RESULT static UINTVAL utf16_decode(PARROT_INTERP, ARGIN(const utf16_t *p)) { ASSERT_ARGS(utf16_decode) UINTVAL c = *p; if (UNICODE_IS_HIGH_SURROGATE(c)) c = UNICODE_DECODE_SURROGATE(c, p[1]); return c; }
PARROT_WARN_UNUSED_RESULT static UINTVAL utf16_scan(PARROT_INTERP, ARGIN(const STRING *src)) { ASSERT_ARGS(utf16_scan) const utf16_t *p = (utf16_t *)src->strstart; UINTVAL len = 0; UINTVAL i, n; if (src->bufused & 1) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Unaligned end in UTF-16 string\n"); n = src->bufused >> 1; for (i = 0; i < n; ++i) { UINTVAL c = p[i]; if (UNICODE_IS_HIGH_SURROGATE(c)) { ++i; if (i >= n) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Unaligned end in UTF-16 string\n"); if (!UNICODE_IS_LOW_SURROGATE(p[i])) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Malformed UTF-16 string\n"); c = UNICODE_DECODE_SURROGATE(c, p[i]); } else { if (UNICODE_IS_LOW_SURROGATE(c)) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF16, "Malformed UTF-16 string\n"); } if (UNICODE_IS_NON_CHARACTER(c)) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER, "Non-character in UTF-16 string\n"); ++len; } return len; }