Esempio n. 1
0
PARROT_CANNOT_RETURN_NULL
STRING *
encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
    ASSERT_ARGS(encoding_substr)
    const UINTVAL  strlen = STRING_length(src);
    STRING        *return_string;
    String_iter    iter;
    UINTVAL        start;

    if (offset < 0)
        offset += strlen;

    if ((UINTVAL)offset >= strlen || length <= 0) {
        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
        if ((UINTVAL)offset == strlen || length <= 0)
            return Parrot_str_new_constant(interp, "");

        Parrot_ex_throw_from_c_noargs(interp,
            EXCEPTION_SUBSTR_OUT_OF_STRING,
            "Cannot take substr outside string");
    }

    return_string = Parrot_str_copy(interp, src);

    if (offset == 0 && (UINTVAL)length >= strlen)
        return return_string;

    STRING_ITER_INIT(interp, &iter);

    if (offset)
        STRING_iter_skip(interp, src, &iter, offset);

    start = iter.bytepos;
    return_string->strstart += start;

    if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
        return_string->bufused -= start;
        return_string->strlen  -= offset;
    }
    else {
        STRING_iter_skip(interp, src, &iter, length);
        return_string->bufused = iter.bytepos - start;
        return_string->strlen  = length;
    }

    return_string->hashval = 0;

    return return_string;
}
Esempio n. 2
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ucs4_to_encoding)
    const UINTVAL  len = src->strlen;
    UINTVAL        i;
    STRING        *res;
    utf32_t       *ptr;

    if (src->encoding == Parrot_ucs4_encoding_ptr)
        return Parrot_str_copy(interp, src);

    res = Parrot_str_new_init(interp, NULL, len * 4,
            Parrot_ucs4_encoding_ptr, 0);
    ptr = (utf32_t *)res->strstart;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        const unsigned char *s = (unsigned char *)src->strstart;

        for (i = 0; i < len; i++) {
            ptr[i] = s[i];
        }
    }
    else {
        String_iter iter;

        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            i      = iter.charpos;
            ptr[i] = STRING_iter_get_and_advance(interp, src, &iter);
        }
    }

    res->strlen  = len;
    res->bufused = len * 4;

    return res;
}
Esempio n. 3
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
to_encoding(PARROT_INTERP, ARGIN(STRING *src), ARGIN_NULLOK(STRING *dest))
{
    ASSERT_ARGS(to_encoding)
#if PARROT_HAS_ICU
    UErrorCode err;
    int dest_len;
    UChar *p;
#endif
    int src_len;
    int in_place = dest == NULL;
    STRING *result;

    if (src->encoding == Parrot_utf16_encoding_ptr ||
            src->encoding == Parrot_ucs2_encoding_ptr)
        return in_place ? src : Parrot_str_copy(interp, src);
    /*
     * TODO adapt string creation functions
     */
    src_len = src->strlen;
    if (in_place) {
        result = src;
    }
    else {
        result = dest;
    }
    if (!src_len) {
        result->charset  = Parrot_unicode_charset_ptr;
        result->encoding = Parrot_ucs2_encoding_ptr;
        result->strlen = result->bufused = 0;
        return result;
    }
    /*
       u_strFromUTF8(UChar *dest,
       int32_t destCapacity,
       int32_t *pDestLength,
       const char *src,
       int32_t srcLength,
       UErrorCode *pErrorCode);
       */
#if PARROT_HAS_ICU
    if (in_place) {
        /* need intermediate memory */
        p = (UChar *)mem_sys_allocate(src_len * sizeof (UChar));
    }
    else {
        Parrot_gc_reallocate_string_storage(interp, dest, sizeof (UChar) * src_len);
        p = (UChar *)dest->strstart;
    }
    if (src->charset == Parrot_iso_8859_1_charset_ptr ||
            src->charset == Parrot_ascii_charset_ptr) {
        for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) {
            p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len];
        }
    }
    else {
        err = U_ZERO_ERROR;
        u_strFromUTF8(p, src_len,
                &dest_len, src->strstart, src->bufused, &err);
        if (!U_SUCCESS(err)) {
            /*
             * have to resize - required len in UChars is in dest_len
             */
            if (in_place)
                p = (UChar *)mem_sys_realloc(p, dest_len * sizeof (UChar));
            else {
                result->bufused = dest_len * sizeof (UChar);
                Parrot_gc_reallocate_string_storage(interp, dest,
                                         sizeof (UChar) * dest_len);
                p = (UChar *)dest->strstart;
            }
            u_strFromUTF8(p, dest_len,
                    &dest_len, src->strstart, src->bufused, &err);
            PARROT_ASSERT(U_SUCCESS(err));
        }
    }
    result->bufused = dest_len * sizeof (UChar);
    if (in_place) {
        Parrot_gc_reallocate_string_storage(interp, src, src->bufused);
        memcpy(src->strstart, p, src->bufused);
        mem_sys_free(p);
    }
    result->charset  = Parrot_unicode_charset_ptr;
    result->encoding = Parrot_utf16_encoding_ptr;
    result->strlen = src_len;

    /* downgrade if possible */
    if (dest_len == (int)src->strlen)
        result->encoding = Parrot_ucs2_encoding_ptr;
    return result;
#else
    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
        "no ICU lib loaded");
#endif
}