Ejemplo n.º 1
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_equal(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
{
    ASSERT_ARGS(encoding_equal)
    String_iter l_iter, r_iter;
    const UINTVAL len = STRING_length(lhs);

    if (len != STRING_length(rhs))
        return 0;
    if (len == 0)
        return 1;
    if (lhs == rhs)
        return 1;
    if (lhs->hashval && rhs->hashval && lhs->hashval != rhs->hashval)
        return 0;
    if (lhs->encoding == rhs->encoding)
        return memcmp(lhs->strstart, rhs->strstart, STRING_byte_length(lhs)) == 0;

    STRING_ITER_INIT(interp, &l_iter);
    STRING_ITER_INIT(interp, &r_iter);

    while (l_iter.charpos < len) {
        const UINTVAL cl = STRING_iter_get_and_advance(interp, lhs, &l_iter);
        const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &r_iter);

        if (cl != cr)
            return 0;
    }

    return 1;
}
Ejemplo n.º 2
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_rindex(PARROT_INTERP, ARGIN(const STRING *src),
        ARGIN(const STRING *search), INTVAL offset)
{
    ASSERT_ARGS(encoding_rindex)
    String_iter search_iter, search_start, start;
    const UINTVAL len = search->strlen;
    UINTVAL c0;
    INTVAL  skip;

    if (offset < 0
    ||  len == 0
    ||  src->strlen < len)
        return -1;

    skip = src->strlen - len;

    if (offset < skip)
        skip = offset;

    STRING_ITER_INIT(interp, &start);
    STRING_iter_skip(interp, src, &start, skip);

    STRING_ITER_INIT(interp, &search_start);
    c0 = STRING_iter_get_and_advance(interp, search, &search_start);

    while (1) {
        UINTVAL c1 = STRING_iter_get(interp, src, &start, 0);

        if (c1 == c0) {
            UINTVAL c2;
            String_iter iter = start;

            STRING_iter_skip(interp, src, &iter, 1);
            search_iter = search_start;

            do {
                if (search_iter.charpos >= len)
                    return start.charpos;
                c1 = STRING_iter_get_and_advance(interp, src, &iter);
                c2 = STRING_iter_get_and_advance(interp, search, &search_iter);
            } while (c1 == c2);
        }

        if (start.charpos == 0)
            break;

        STRING_iter_skip(interp, src, &start, -1);
    }

    return -1;
}
Ejemplo n.º 3
0
PARROT_CANNOT_RETURN_NULL
PARROT_WARN_UNUSED_RESULT
STRING *
encoding_to_encoding(PARROT_INTERP, ARGIN(const STRING *src),
        ARGIN(const STR_VTABLE *encoding), double avg_bytes)
{
    ASSERT_ARGS(encoding_to_encoding)
    STRING           *result;
    String_iter       src_iter, dest_iter;
    UINTVAL           src_len, alloc_bytes;
    UINTVAL           max_bytes = encoding->max_bytes_per_codepoint;

    if (src->encoding == encoding)
        return Parrot_str_clone(interp, src);

    src_len          = src->strlen;
    result           = Parrot_gc_new_string_header(interp, 0);
    result->encoding = encoding;
    result->strlen   = src_len;

    if (!src_len)
        return result;

    alloc_bytes = (UINTVAL)(src_len * avg_bytes);
    if (alloc_bytes < max_bytes)
        alloc_bytes = max_bytes;
    Parrot_gc_allocate_string_storage(interp, result, alloc_bytes);
    result->bufused = alloc_bytes;

    STRING_ITER_INIT(interp, &src_iter);
    STRING_ITER_INIT(interp, &dest_iter);

    while (src_iter.charpos < src_len) {
        const UINTVAL c      = STRING_iter_get_and_advance(interp, src, &src_iter);
        const UINTVAL needed = dest_iter.bytepos + max_bytes;

        if (needed > result->bufused) {
            alloc_bytes  = src_len - src_iter.charpos;
            alloc_bytes  = (UINTVAL)(alloc_bytes * avg_bytes);
            alloc_bytes += needed;
            Parrot_gc_reallocate_string_storage(interp, result, alloc_bytes);
            result->bufused = alloc_bytes;
        }

        STRING_iter_set_and_advance(interp, result, &dest_iter, c);
    }

    result->bufused = dest_iter.bytepos;

    return result;
}
Ejemplo n.º 4
0
PARROT_CANNOT_RETURN_NULL
STRING *
encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
    ASSERT_ARGS(encoding_substr)
    const UINTVAL  strlen = STRING_length(src);
    STRING        *return_string;
    String_iter    iter;
    UINTVAL        start;

    if (offset < 0)
        offset += strlen;

    if ((UINTVAL)offset >= strlen || length <= 0) {
        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
        if ((UINTVAL)offset == strlen || length <= 0)
            return Parrot_str_new_constant(interp, "");

        Parrot_ex_throw_from_c_noargs(interp,
            EXCEPTION_SUBSTR_OUT_OF_STRING,
            "Cannot take substr outside string");
    }

    return_string = Parrot_str_copy(interp, src);

    if (offset == 0 && (UINTVAL)length >= strlen)
        return return_string;

    STRING_ITER_INIT(interp, &iter);

    if (offset)
        STRING_iter_skip(interp, src, &iter, offset);

    start = iter.bytepos;
    return_string->strstart += start;

    if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
        return_string->bufused -= start;
        return_string->strlen  -= offset;
    }
    else {
        STRING_iter_skip(interp, src, &iter, length);
        return_string->bufused = iter.bytepos - start;
        return_string->strlen  = length;
    }

    return_string->hashval = 0;

    return return_string;
}
Ejemplo n.º 5
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
        UINTVAL offset, UINTVAL count)
{
    ASSERT_ARGS(encoding_find_cclass)
    String_iter iter;
    UINTVAL     codepoint;
    UINTVAL     end = offset + count;

    static UINTVAL last_char_offset;
    static String_iter cached_iter;
    static STRING *last_string = 0;

    if (last_string == src && offset > last_char_offset) {
        iter = cached_iter;
        STRING_iter_skip(interp, src, &iter, offset - last_char_offset);
    }
    else if (last_string == src && offset == last_char_offset) {
        iter = cached_iter;
    }
    else {
        STRING_ITER_INIT(interp, &iter);
        STRING_iter_skip(interp, src, &iter, offset);
    }

    end = src->strlen < end ? src->strlen : end;

    while (iter.charpos < end) {
        codepoint = STRING_iter_get_and_advance(interp, src, &iter);
        if (codepoint >= 256) {
            if (u_iscclass(interp, codepoint, flags))
                goto return_and_cache;
        }
        else {
            if (Parrot_iso_8859_1_typetable[codepoint] & flags)
                goto return_and_cache;
        }
    }

    return end;
return_and_cache:
    if (iter.charpos > 128) {
        last_char_offset = iter.charpos;
        cached_iter = iter;
        last_string = (STRING*)PTR2INTVAL(src);
    }
    return iter.charpos - 1;
}
Ejemplo n.º 6
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
{
    ASSERT_ARGS(encoding_compare)
    String_iter l_iter, r_iter;
    const UINTVAL l_len = STRING_length(lhs);
    const UINTVAL r_len = STRING_length(rhs);
    UINTVAL min_len;

    if (r_len == 0)
        return l_len != 0;
    if (l_len == 0)
        return -1;

    STRING_ITER_INIT(interp, &l_iter);
    STRING_ITER_INIT(interp, &r_iter);

    min_len = l_len > r_len ? r_len : l_len;

    while (l_iter.charpos < min_len) {
        const UINTVAL cl = STRING_iter_get_and_advance(interp, lhs, &l_iter);
        const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &r_iter);

        if (cl != cr)
            return cl < cr ? -1 : 1;
    }

    if (l_len < r_len)
        return -1;

    if (l_len > r_len)
        return 1;

    return 0;
}
Ejemplo n.º 7
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_index(PARROT_INTERP, ARGIN(const STRING *src),
        ARGIN(const STRING *search), INTVAL offset)
{
    ASSERT_ARGS(encoding_index)
    String_iter start, end;

    if ((UINTVAL)offset >= STRING_length(src)
    ||  !STRING_length(search))
        return -1;

    STRING_ITER_INIT(interp, &start);
    STRING_iter_skip(interp, src, &start, offset);

    return Parrot_str_iter_index(interp, src, &start, &end, search);
}
Ejemplo n.º 8
0
PARROT_CANNOT_RETURN_NULL
static STRING *
ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ascii_to_encoding)
    STRING        *dest;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        unsigned char * const src_buf  = (unsigned char *)src->strstart;
        UINTVAL offs;

        for (offs = 0; offs < src->strlen; ++offs) {
            UINTVAL c = src_buf[offs];
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                    "lossy conversion to ascii");
        }

        dest           = Parrot_str_clone(interp, src);
        dest->encoding = Parrot_ascii_encoding_ptr;
    }
    else {
        String_iter iter;
        unsigned char *p;
        const UINTVAL len = src->strlen;

        dest = Parrot_str_new_init(interp, NULL, len,
                Parrot_ascii_encoding_ptr, 0);
        p    = (unsigned char *)dest->strstart;
        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
            if (c >= 0x80)
                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
                        "can't convert unicode string to ascii");
            *p++ = c;
        }

        dest->bufused = len;
        dest->strlen  = len;
    }

    return dest;
}
Ejemplo n.º 9
0
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
    ASSERT_ARGS(ucs4_to_encoding)
    const UINTVAL  len = src->strlen;
    UINTVAL        i;
    STRING        *res;
    utf32_t       *ptr;

    if (src->encoding == Parrot_ucs4_encoding_ptr)
        return Parrot_str_copy(interp, src);

    res = Parrot_str_new_init(interp, NULL, len * 4,
            Parrot_ucs4_encoding_ptr, 0);
    ptr = (utf32_t *)res->strstart;

    if (STRING_max_bytes_per_codepoint(src) == 1) {
        const unsigned char *s = (unsigned char *)src->strstart;

        for (i = 0; i < len; i++) {
            ptr[i] = s[i];
        }
    }
    else {
        String_iter iter;

        STRING_ITER_INIT(interp, &iter);

        while (iter.charpos < len) {
            i      = iter.charpos;
            ptr[i] = STRING_iter_get_and_advance(interp, src, &iter);
        }
    }

    res->strlen  = len;
    res->bufused = len * 4;

    return res;
}
Ejemplo n.º 10
0
PARROT_WARN_UNUSED_RESULT
size_t
encoding_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t hashval)
{
    ASSERT_ARGS(encoding_hash)
    DECL_CONST_CAST;
    STRING * const s = PARROT_const_cast(STRING *, src);
    String_iter iter;

    STRING_ITER_INIT(interp, &iter);

    while (iter.charpos < s->strlen) {
        const UINTVAL c = STRING_iter_get_and_advance(interp, s, &iter);
        hashval += hashval << 5;
        hashval += c;
    }

    s->hashval = hashval;

    return hashval;
}
Ejemplo n.º 11
0
PARROT_WARN_UNUSED_RESULT
INTVAL
encoding_find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
        UINTVAL offset, UINTVAL count)
{
    ASSERT_ARGS(encoding_find_not_cclass)
    String_iter iter;
    UINTVAL     codepoint;
    UINTVAL     end = offset + count;
    int         bit;

    static UINTVAL last_char_offset;
    static String_iter cached_iter;
    static STRING *last_string = 0;

    if (offset > src->strlen) {
        /* XXX: Throw in this case? */
        return offset + count;
    }

    if (last_string == src && offset > last_char_offset) {
        iter = cached_iter;
        STRING_iter_skip(interp, src, &iter, offset - last_char_offset);
    }
    else if (last_string == src && offset == last_char_offset) {
        iter = cached_iter;
    }
    else {
        STRING_ITER_INIT(interp, &iter);
        if (offset)
            STRING_iter_skip(interp, src, &iter, offset);
    }

    end = src->strlen < end ? src->strlen : end;

    if (flags == enum_cclass_any)
        return end;

    while (iter.charpos < end) {
        codepoint = STRING_iter_get_and_advance(interp, src, &iter);
        if (codepoint >= 256) {
            for (bit = enum_cclass_uppercase;
                    bit <= enum_cclass_word ; bit <<= 1) {
                if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
                    goto return_and_cache;
            }
        }
        else {
            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
                goto return_and_cache;
        }
    }

    return end;
return_and_cache:
    if (iter.charpos > 128) {
        last_char_offset = iter.charpos;
        cached_iter = iter;
        last_string = (STRING*)PTR2INTVAL(src);
    }
    return iter.charpos - 1;
}