Esempio n. 1
0
PARROT_CANNOT_RETURN_NULL
static STRING *
to_iso_8859_1(PARROT_INTERP, ARGIN(STRING *src), ARGMOD_NULLOK(STRING *dest))
{
    ASSERT_ARGS(to_iso_8859_1)
    UINTVAL offs, src_len;
    String_iter iter;

    ENCODING_ITER_INIT(interp, src, &iter);
    src_len = src->strlen;
    if (dest) {
        Parrot_gc_reallocate_string_storage(interp, dest, src_len);
        dest->strlen  = src_len;
    }
    else {
        /* iso-8859-1 is never bigger then source */
        dest = src;
    }
    dest->bufused = src_len;
    dest->charset = Parrot_iso_8859_1_charset_ptr;
    dest->encoding = Parrot_fixed_8_encoding_ptr;
    for (offs = 0; offs < src_len; ++offs) {
        const UINTVAL c = iter.get_and_advance(interp, &iter);
        if (c >= 0x100)
            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                "lossy conversion to iso-8559-1");

        ENCODING_SET_BYTE(interp, dest, offs, c);
    }
    return dest;
}
Esempio n. 2
0
PARROT_CANNOT_RETURN_NULL
PARROT_WARN_UNUSED_RESULT
STRING *
encoding_to_encoding(PARROT_INTERP, ARGIN(const STRING *src),
        ARGIN(const STR_VTABLE *encoding), double avg_bytes)
{
    ASSERT_ARGS(encoding_to_encoding)
    STRING           *result;
    String_iter       src_iter, dest_iter;
    UINTVAL           src_len, alloc_bytes;
    UINTVAL           max_bytes = encoding->max_bytes_per_codepoint;

    if (src->encoding == encoding)
        return Parrot_str_clone(interp, src);

    src_len          = src->strlen;
    result           = Parrot_gc_new_string_header(interp, 0);
    result->encoding = encoding;
    result->strlen   = src_len;

    if (!src_len)
        return result;

    alloc_bytes = (UINTVAL)(src_len * avg_bytes);
    if (alloc_bytes < max_bytes)
        alloc_bytes = max_bytes;
    Parrot_gc_allocate_string_storage(interp, result, alloc_bytes);
    result->bufused = alloc_bytes;

    STRING_ITER_INIT(interp, &src_iter);
    STRING_ITER_INIT(interp, &dest_iter);

    while (src_iter.charpos < src_len) {
        const UINTVAL c      = STRING_iter_get_and_advance(interp, src, &src_iter);
        const UINTVAL needed = dest_iter.bytepos + max_bytes;

        if (needed > result->bufused) {
            alloc_bytes  = src_len - src_iter.charpos;
            alloc_bytes  = (UINTVAL)(alloc_bytes * avg_bytes);
            alloc_bytes += needed;
            Parrot_gc_reallocate_string_storage(interp, result, alloc_bytes);
            result->bufused = alloc_bytes;
        }

        STRING_iter_set_and_advance(interp, result, &dest_iter, c);
    }

    result->bufused = dest_iter.bytepos;

    return result;
}
Esempio n. 3
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
to_encoding(PARROT_INTERP, ARGIN(STRING *src), ARGIN_NULLOK(STRING *dest))
{
    ASSERT_ARGS(to_encoding)
#if PARROT_HAS_ICU
    UErrorCode err;
    int dest_len;
    UChar *p;
#endif
    int src_len;
    int in_place = dest == NULL;
    STRING *result;

    if (src->encoding == Parrot_utf16_encoding_ptr ||
            src->encoding == Parrot_ucs2_encoding_ptr)
        return in_place ? src : Parrot_str_copy(interp, src);
    /*
     * TODO adapt string creation functions
     */
    src_len = src->strlen;
    if (in_place) {
        result = src;
    }
    else {
        result = dest;
    }
    if (!src_len) {
        result->charset  = Parrot_unicode_charset_ptr;
        result->encoding = Parrot_ucs2_encoding_ptr;
        result->strlen = result->bufused = 0;
        return result;
    }
    /*
       u_strFromUTF8(UChar *dest,
       int32_t destCapacity,
       int32_t *pDestLength,
       const char *src,
       int32_t srcLength,
       UErrorCode *pErrorCode);
       */
#if PARROT_HAS_ICU
    if (in_place) {
        /* need intermediate memory */
        p = (UChar *)mem_sys_allocate(src_len * sizeof (UChar));
    }
    else {
        Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * src_len);
        p = (UChar *)dest->strstart;
    }
    if (src->charset == Parrot_iso_8859_1_charset_ptr ||
            src->charset == Parrot_ascii_charset_ptr) {
        for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) {
            p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len];
        }
    }
    else {
        err = U_ZERO_ERROR;
        u_strFromUTF8(p, src_len,
                &dest_len, src->strstart, src->bufused, &err);
        if (!U_SUCCESS(err)) {
            /*
             * have to resize - required len in UChars is in dest_len
             */
            if (in_place)
                p = (UChar *)mem_sys_realloc(p, dest_len * sizeof (UChar));
            else {
                result->bufused = dest_len * sizeof (UChar);
                Parrot_gc_reallocate_string_storage(interp, dest,
                                         sizeof (UChar) * dest_len);
                p = (UChar *)dest->strstart;
            }
            u_strFromUTF8(p, dest_len,
                    &dest_len, src->strstart, src->bufused, &err);
            PARROT_ASSERT(U_SUCCESS(err));
        }
    }
    result->bufused = dest_len * sizeof (UChar);
    if (in_place) {
        Parrot_gc_reallocate_string_storage(interp, src, src->bufused);
        memcpy(src->strstart, p, src->bufused);
        mem_sys_free(p);
    }
    result->charset  = Parrot_unicode_charset_ptr;
    result->encoding = Parrot_utf16_encoding_ptr;
    result->strlen = src_len;

    /* downgrade if possible */
    if (dest_len == (int)src->strlen)
        result->encoding = Parrot_ucs2_encoding_ptr;
    return result;
#else
    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
        "no ICU lib loaded");
#endif
}