PARROT_CANNOT_RETURN_NULL static STRING * to_iso_8859_1(PARROT_INTERP, ARGIN(STRING *src), ARGMOD_NULLOK(STRING *dest)) { ASSERT_ARGS(to_iso_8859_1) UINTVAL offs, src_len; String_iter iter; ENCODING_ITER_INIT(interp, src, &iter); src_len = src->strlen; if (dest) { Parrot_gc_reallocate_string_storage(interp, dest, src_len); dest->strlen = src_len; } else { /* iso-8859-1 is never bigger then source */ dest = src; } dest->bufused = src_len; dest->charset = Parrot_iso_8859_1_charset_ptr; dest->encoding = Parrot_fixed_8_encoding_ptr; for (offs = 0; offs < src_len; ++offs) { const UINTVAL c = iter.get_and_advance(interp, &iter); if (c >= 0x100) Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, "lossy conversion to iso-8559-1"); ENCODING_SET_BYTE(interp, dest, offs, c); } return dest; }
PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING * encoding_to_encoding(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STR_VTABLE *encoding), double avg_bytes) { ASSERT_ARGS(encoding_to_encoding) STRING *result; String_iter src_iter, dest_iter; UINTVAL src_len, alloc_bytes; UINTVAL max_bytes = encoding->max_bytes_per_codepoint; if (src->encoding == encoding) return Parrot_str_clone(interp, src); src_len = src->strlen; result = Parrot_gc_new_string_header(interp, 0); result->encoding = encoding; result->strlen = src_len; if (!src_len) return result; alloc_bytes = (UINTVAL)(src_len * avg_bytes); if (alloc_bytes < max_bytes) alloc_bytes = max_bytes; Parrot_gc_allocate_string_storage(interp, result, alloc_bytes); result->bufused = alloc_bytes; STRING_ITER_INIT(interp, &src_iter); STRING_ITER_INIT(interp, &dest_iter); while (src_iter.charpos < src_len) { const UINTVAL c = STRING_iter_get_and_advance(interp, src, &src_iter); const UINTVAL needed = dest_iter.bytepos + max_bytes; if (needed > result->bufused) { alloc_bytes = src_len - src_iter.charpos; alloc_bytes = (UINTVAL)(alloc_bytes * avg_bytes); alloc_bytes += needed; Parrot_gc_reallocate_string_storage(interp, result, alloc_bytes); result->bufused = alloc_bytes; } STRING_iter_set_and_advance(interp, result, &dest_iter, c); } result->bufused = dest_iter.bytepos; return result; }
PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static STRING * to_encoding(PARROT_INTERP, ARGIN(STRING *src), ARGIN_NULLOK(STRING *dest)) { ASSERT_ARGS(to_encoding) #if PARROT_HAS_ICU UErrorCode err; int dest_len; UChar *p; #endif int src_len; int in_place = dest == NULL; STRING *result; if (src->encoding == Parrot_utf16_encoding_ptr || src->encoding == Parrot_ucs2_encoding_ptr) return in_place ? src : Parrot_str_copy(interp, src); /* * TODO adapt string creation functions */ src_len = src->strlen; if (in_place) { result = src; } else { result = dest; } if (!src_len) { result->charset = Parrot_unicode_charset_ptr; result->encoding = Parrot_ucs2_encoding_ptr; result->strlen = result->bufused = 0; return result; } /* u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode); */ #if PARROT_HAS_ICU if (in_place) { /* need intermediate memory */ p = (UChar *)mem_sys_allocate(src_len * sizeof (UChar)); } else { Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * src_len); p = (UChar *)dest->strstart; } if (src->charset == Parrot_iso_8859_1_charset_ptr || src->charset == Parrot_ascii_charset_ptr) { for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) { p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len]; } } else { err = U_ZERO_ERROR; u_strFromUTF8(p, src_len, &dest_len, src->strstart, src->bufused, &err); if (!U_SUCCESS(err)) { /* * have to resize - required len in UChars is in dest_len */ if (in_place) p = (UChar *)mem_sys_realloc(p, dest_len * sizeof (UChar)); else { result->bufused = dest_len * sizeof (UChar); Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * dest_len); p = (UChar *)dest->strstart; } u_strFromUTF8(p, dest_len, &dest_len, src->strstart, src->bufused, &err); PARROT_ASSERT(U_SUCCESS(err)); } } result->bufused = dest_len * sizeof (UChar); if (in_place) { Parrot_gc_reallocate_string_storage(interp, src, src->bufused); memcpy(src->strstart, p, src->bufused); mem_sys_free(p); } result->charset = Parrot_unicode_charset_ptr; result->encoding = Parrot_utf16_encoding_ptr; result->strlen = src_len; /* downgrade if possible */ if (dest_len == (int)src->strlen) result->encoding = Parrot_ucs2_encoding_ptr; return result; #else Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, "no ICU lib loaded"); #endif }