PARROT_CANNOT_RETURN_NULL STRING * encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length) { ASSERT_ARGS(encoding_substr) const UINTVAL strlen = STRING_length(src); STRING *return_string; String_iter iter; UINTVAL start; if (offset < 0) offset += strlen; if ((UINTVAL)offset >= strlen || length <= 0) { /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */ if ((UINTVAL)offset == strlen || length <= 0) return Parrot_str_new_constant(interp, ""); Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_SUBSTR_OUT_OF_STRING, "Cannot take substr outside string"); } return_string = Parrot_str_copy(interp, src); if (offset == 0 && (UINTVAL)length >= strlen) return return_string; STRING_ITER_INIT(interp, &iter); if (offset) STRING_iter_skip(interp, src, &iter, offset); start = iter.bytepos; return_string->strstart += start; if ((UINTVAL)length >= strlen - (UINTVAL)offset) { return_string->bufused -= start; return_string->strlen -= offset; } else { STRING_iter_skip(interp, src, &iter, length); return_string->bufused = iter.bytepos - start; return_string->strlen = length; } return_string->hashval = 0; return return_string; }
PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) { ASSERT_ARGS(ucs4_to_encoding) const UINTVAL len = src->strlen; UINTVAL i; STRING *res; utf32_t *ptr; if (src->encoding == Parrot_ucs4_encoding_ptr) return Parrot_str_copy(interp, src); res = Parrot_str_new_init(interp, NULL, len * 4, Parrot_ucs4_encoding_ptr, 0); ptr = (utf32_t *)res->strstart; if (STRING_max_bytes_per_codepoint(src) == 1) { const unsigned char *s = (unsigned char *)src->strstart; for (i = 0; i < len; i++) { ptr[i] = s[i]; } } else { String_iter iter; STRING_ITER_INIT(interp, &iter); while (iter.charpos < len) { i = iter.charpos; ptr[i] = STRING_iter_get_and_advance(interp, src, &iter); } } res->strlen = len; res->bufused = len * 4; return res; }
PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static STRING * to_encoding(PARROT_INTERP, ARGIN(STRING *src), ARGIN_NULLOK(STRING *dest)) { ASSERT_ARGS(to_encoding) #if PARROT_HAS_ICU UErrorCode err; int dest_len; UChar *p; #endif int src_len; int in_place = dest == NULL; STRING *result; if (src->encoding == Parrot_utf16_encoding_ptr || src->encoding == Parrot_ucs2_encoding_ptr) return in_place ? src : Parrot_str_copy(interp, src); /* * TODO adapt string creation functions */ src_len = src->strlen; if (in_place) { result = src; } else { result = dest; } if (!src_len) { result->charset = Parrot_unicode_charset_ptr; result->encoding = Parrot_ucs2_encoding_ptr; result->strlen = result->bufused = 0; return result; } /* u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode); */ #if PARROT_HAS_ICU if (in_place) { /* need intermediate memory */ p = (UChar *)mem_sys_allocate(src_len * sizeof (UChar)); } else { Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * src_len); p = (UChar *)dest->strstart; } if (src->charset == Parrot_iso_8859_1_charset_ptr || src->charset == Parrot_ascii_charset_ptr) { for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) { p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len]; } } else { err = U_ZERO_ERROR; u_strFromUTF8(p, src_len, &dest_len, src->strstart, src->bufused, &err); if (!U_SUCCESS(err)) { /* * have to resize - required len in UChars is in dest_len */ if (in_place) p = (UChar *)mem_sys_realloc(p, dest_len * sizeof (UChar)); else { result->bufused = dest_len * sizeof (UChar); Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * dest_len); p = (UChar *)dest->strstart; } u_strFromUTF8(p, dest_len, &dest_len, src->strstart, src->bufused, &err); PARROT_ASSERT(U_SUCCESS(err)); } } result->bufused = dest_len * sizeof (UChar); if (in_place) { Parrot_gc_reallocate_string_storage(interp, src, src->bufused); memcpy(src->strstart, p, src->bufused); mem_sys_free(p); } result->charset = Parrot_unicode_charset_ptr; result->encoding = Parrot_utf16_encoding_ptr; result->strlen = src_len; /* downgrade if possible */ if (dest_len == (int)src->strlen) result->encoding = Parrot_ucs2_encoding_ptr; return result; #else Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, "no ICU lib loaded"); #endif }