Пример #1
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(utf16_to_encoding)
    STRING  *result;
    UINTVAL  src_len;

    src_len = STRING_length(src);

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        result           = Parrot_gc_new_string_header(interp, 0);
        result->encoding = Parrot_ucs2_encoding_ptr;
        result->bufused  = 2 * src_len;
        result->strlen   = src_len;

        if (src_len) {
            UINTVAL       i;
            Parrot_UInt2 *p;

            Parrot_gc_allocate_string_storage(interp, result, 2 * src_len);
            p = (Parrot_UInt2 *)result->strstart;

            for (i = 0; i < src_len; ++i) {
                p[i] = (unsigned char)src->strstart[i];
            }
        }
    }
    else if (src->encoding == Parrot_utf16_encoding_ptr
         ||  src->encoding == Parrot_ucs2_encoding_ptr) {
        /* we have to use clone instead of copy because the Unicode upcase
         * and downcase functions assume to get an unshared buffer */
        result = Parrot_str_clone(interp, src);
    }
    else {
        result = encoding_to_encoding(interp, src, Parrot_utf16_encoding_ptr, 2.2);

        /* downgrade if possible */
        if (result->bufused == result->strlen << 1)
            result->encoding = Parrot_ucs2_encoding_ptr;
    }

    return result;
}
Пример #2
0
PARROT_CANNOT_RETURN_NULL
static STRING *
ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ascii_to_encoding)
    STRING        *dest;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        unsigned char * const src_buf  = (unsigned char *)src->strstart;
        UINTVAL offs;

        for (offs = 0; offs < src->strlen; ++offs) {
            UINTVAL c = src_buf[offs];
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                    "lossy conversion to ascii");
        }

        dest           = Parrot_str_clone(interp, src);
        dest->encoding = Parrot_ascii_encoding_ptr;
    }
    else {
        String_iter iter;
        unsigned char *p;
        const UINTVAL len = src->strlen;

        dest = Parrot_str_new_init(interp, NULL, len,
                Parrot_ascii_encoding_ptr, 0);
        p    = (unsigned char *)dest->strstart;
        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                        "can't convert unicode string to ascii");
            *p++ = c;
        }

        dest->bufused = len;
        dest->strlen  = len;
    }

    return dest;
}
Пример #3
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ucs4_to_encoding)
    const UINTVAL  len = src->strlen;
    UINTVAL        i;
    STRING        *res;
    utf32_t       *ptr;

    if (src->encoding == Parrot_ucs4_encoding_ptr)
        return Parrot_str_copy(interp, src);

    res = Parrot_str_new_init(interp, NULL, len * 4,
            Parrot_ucs4_encoding_ptr, 0);
    ptr = (utf32_t *)res->strstart;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        const unsigned char *s = (unsigned char *)src->strstart;

        for (i = 0; i < len; i++) {
            ptr[i] = s[i];
        }
    }
    else {
        String_iter iter;

        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            i      = iter.charpos;
            ptr[i] = STRING_iter_get_and_advance(interp, src, &iter);
        }
    }

    res->strlen  = len;
    res->bufused = len * 4;

    return res;
}