示例#1
0
文件: shared.c 项目: tidatida/parrot
PARROT_CANNOT_RETURN_NULL
PARROT_WARN_UNUSED_RESULT
STRING *
encoding_to_encoding(PARROT_INTERP, ARGIN(const STRING *src),
        ARGIN(const STR_VTABLE *encoding), double avg_bytes)
{
    ASSERT_ARGS(encoding_to_encoding)
    STRING           *result;
    String_iter       src_iter, dest_iter;
    UINTVAL           src_len, alloc_bytes;
    UINTVAL           max_bytes = encoding->max_bytes_per_codepoint;

    if (src->encoding == encoding)
        return Parrot_str_clone(interp, src);

    src_len          = src->strlen;
    result           = Parrot_gc_new_string_header(interp, 0);
    result->encoding = encoding;
    result->strlen   = src_len;

    if (!src_len)
        return result;

    alloc_bytes = (UINTVAL)(src_len * avg_bytes);
    if (alloc_bytes < max_bytes)
        alloc_bytes = max_bytes;
    Parrot_gc_allocate_string_storage(interp, result, alloc_bytes);
    result->bufused = alloc_bytes;

    STRING_ITER_INIT(interp, &src_iter);
    STRING_ITER_INIT(interp, &dest_iter);

    while (src_iter.charpos < src_len) {
        const UINTVAL c      = STRING_iter_get_and_advance(interp, src, &src_iter);
        const UINTVAL needed = dest_iter.bytepos + max_bytes;

        if (needed > result->bufused) {
            alloc_bytes  = src_len - src_iter.charpos;
            alloc_bytes  = (UINTVAL)(alloc_bytes * avg_bytes);
            alloc_bytes += needed;
            Parrot_gc_reallocate_string_storage(interp, result, alloc_bytes);
            result->bufused = alloc_bytes;
        }

        STRING_iter_set_and_advance(interp, result, &dest_iter, c);
    }

    result->bufused = dest_iter.bytepos;

    return result;
}
示例#2
0
文件: utf8.c 项目: Cristofor/parrot
PARROT_CAN_RETURN_NULL
static STRING *
utf8_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(utf8_to_encoding)
    STRING  *result;

    if (src->encoding == Parrot_ascii_encoding_ptr) {
        result           = Parrot_str_clone(interp, src);
        result->encoding = Parrot_utf8_encoding_ptr;
    }
    else {
        result = encoding_to_encoding(interp, src, Parrot_utf8_encoding_ptr, 1.2);
    }

    return result;
}
示例#3
0
文件: utf16.c 项目: Cristofor/parrot
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(utf16_to_encoding)
    STRING  *result;
    UINTVAL  src_len;

    src_len = STRING_length(src);

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        result           = Parrot_gc_new_string_header(interp, 0);
        result->encoding = Parrot_ucs2_encoding_ptr;
        result->bufused  = 2 * src_len;
        result->strlen   = src_len;

        if (src_len) {
            UINTVAL       i;
            Parrot_UInt2 *p;

            Parrot_gc_allocate_string_storage(interp, result, 2 * src_len);
            p = (Parrot_UInt2 *)result->strstart;

            for (i = 0; i < src_len; ++i) {
                p[i] = (unsigned char)src->strstart[i];
            }
        }
    }
    else if (src->encoding == Parrot_utf16_encoding_ptr
         ||  src->encoding == Parrot_ucs2_encoding_ptr) {
        /* we have to use clone instead of copy because the Unicode upcase
         * and downcase functions assume to get an unshared buffer */
        result = Parrot_str_clone(interp, src);
    }
    else {
        result = encoding_to_encoding(interp, src, Parrot_utf16_encoding_ptr, 2.2);

        /* downgrade if possible */
        if (result->bufused == result->strlen << 1)
            result->encoding = Parrot_ucs2_encoding_ptr;
    }

    return result;
}
示例#4
0
文件: ascii.c 项目: mpeters/parrot
PARROT_CANNOT_RETURN_NULL
static STRING *
ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ascii_to_encoding)
    STRING        *dest;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        unsigned char * const src_buf  = (unsigned char *)src->strstart;
        UINTVAL offs;

        for (offs = 0; offs < src->strlen; ++offs) {
            UINTVAL c = src_buf[offs];
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                    "lossy conversion to ascii");
        }

        dest           = Parrot_str_clone(interp, src);
        dest->encoding = Parrot_ascii_encoding_ptr;
    }
    else {
        String_iter iter;
        unsigned char *p;
        const UINTVAL len = src->strlen;

        dest = Parrot_str_new_init(interp, NULL, len,
                Parrot_ascii_encoding_ptr, 0);
        p    = (unsigned char *)dest->strstart;
        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                        "can't convert unicode string to ascii");
            *p++ = c;
        }

        dest->bufused = len;
        dest->strlen  = len;
    }

    return dest;
}