PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static STRING * utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) { ASSERT_ARGS(utf16_to_encoding) STRING *result; UINTVAL src_len; src_len = STRING_length(src); if (STRING_max_bytes_per_codepoint(src) == 1) { result = Parrot_gc_new_string_header(interp, 0); result->encoding = Parrot_ucs2_encoding_ptr; result->bufused = 2 * src_len; result->strlen = src_len; if (src_len) { UINTVAL i; Parrot_UInt2 *p; Parrot_gc_allocate_string_storage(interp, result, 2 * src_len); p = (Parrot_UInt2 *)result->strstart; for (i = 0; i < src_len; ++i) { p[i] = (unsigned char)src->strstart[i]; } } } else if (src->encoding == Parrot_utf16_encoding_ptr || src->encoding == Parrot_ucs2_encoding_ptr) { /* we have to use clone instead of copy because the Unicode upcase * and downcase functions assume to get an unshared buffer */ result = Parrot_str_clone(interp, src); } else { result = encoding_to_encoding(interp, src, Parrot_utf16_encoding_ptr, 2.2); /* downgrade if possible */ if (result->bufused == result->strlen << 1) result->encoding = Parrot_ucs2_encoding_ptr; } return result; }
PARROT_CANNOT_RETURN_NULL static STRING * ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) { ASSERT_ARGS(ascii_to_encoding) STRING *dest; if (STRING_max_bytes_per_codepoint(src) == 1) { unsigned char * const src_buf = (unsigned char *)src->strstart; UINTVAL offs; for (offs = 0; offs < src->strlen; ++offs) { UINTVAL c = src_buf[offs]; if (c >= 0x80) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, "lossy conversion to ascii"); } dest = Parrot_str_clone(interp, src); dest->encoding = Parrot_ascii_encoding_ptr; } else { String_iter iter; unsigned char *p; const UINTVAL len = src->strlen; dest = Parrot_str_new_init(interp, NULL, len, Parrot_ascii_encoding_ptr, 0); p = (unsigned char *)dest->strstart; STRING_ITER_INIT(interp, &iter); while (iter.charpos < len) { const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter); if (c >= 0x80) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, "can't convert unicode string to ascii"); *p++ = c; } dest->bufused = len; dest->strlen = len; } return dest; }
PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) { ASSERT_ARGS(ucs4_to_encoding) const UINTVAL len = src->strlen; UINTVAL i; STRING *res; utf32_t *ptr; if (src->encoding == Parrot_ucs4_encoding_ptr) return Parrot_str_copy(interp, src); res = Parrot_str_new_init(interp, NULL, len * 4, Parrot_ucs4_encoding_ptr, 0); ptr = (utf32_t *)res->strstart; if (STRING_max_bytes_per_codepoint(src) == 1) { const unsigned char *s = (unsigned char *)src->strstart; for (i = 0; i < len; i++) { ptr[i] = s[i]; } } else { String_iter iter; STRING_ITER_INIT(interp, &iter); while (iter.charpos < len) { i = iter.charpos; ptr[i] = STRING_iter_get_and_advance(interp, src, &iter); } } res->strlen = len; res->bufused = len * 4; return res; }