Пример #1
0
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) {
    return str == NULL ||
           !IS_CONCRETE(str) ||
           MVM_string_graphs(tc, str) == 0 ||
           (MVM_string_graphs(tc, str) == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48)
           ? 0 : 1;
}
Пример #2
0
/* Takes a string and sets it up as a decode stream separator. */
void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec,
                                                     MVMString **seps, MVMint32 num_seps) {
    MVMGraphemeIter gi;
    MVMint32 i, graph_length, graph_pos;

    if (num_seps > 0xFFF)
        MVM_exception_throw_adhoc(tc, "Too many line separators");

    MVM_free(sep_spec->sep_lengths);
    MVM_free(sep_spec->sep_graphemes);

    sep_spec->num_seps = num_seps;
    sep_spec->sep_lengths = MVM_malloc(num_seps * sizeof(MVMint32));
    graph_length = 0;
    for (i = 0; i < num_seps; i++) {
        MVMuint32 num_graphs = MVM_string_graphs(tc, seps[i]);
        if (num_graphs > 0xFFFF)
            MVM_exception_throw_adhoc(tc, "Line separator too long");
        sep_spec->sep_lengths[i] = num_graphs;
        graph_length += num_graphs;
    }

    sep_spec->sep_graphemes = MVM_malloc(graph_length * sizeof(MVMGrapheme32));
    graph_pos = 0;
    for (i = 0; i < num_seps; i++) {
        MVM_string_gi_init(tc, &gi, seps[i]);
        while (MVM_string_gi_has_more(tc, &gi))
            sep_spec->sep_graphemes[graph_pos++] = MVM_string_gi_get_grapheme(tc, &gi);
    }
}
Пример #3
0
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
    MVMObject *result;
    MVMint64 zvalue = 0;
    MVMint64 zbase  = 1;
    MVMint64 chars  = MVM_string_graphs(tc, str);
    MVMint64 value  = zvalue;
    MVMint64 base   = zbase;
    MVMint64   pos  = -1;
    MVMuint16  neg  = 0;
    MVMint64   ch;

    if (radix > 36) {
        MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix);
    }

    ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    if ((flag & 0x02) && (ch == '+' || ch == '-')) {
        neg = (ch == '-');
        offset++;
        ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    }

    while (offset < chars) {
        if (ch >= '0' && ch <= '9') ch = ch - '0';
        else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
        else break;
        if (ch >= radix) break;
        zvalue = zvalue * radix + ch;
        zbase = zbase * radix;
        offset++; pos = offset;
        if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
        if (ch != '_') continue;
        offset++;
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
    }

    if (neg || flag & 0x01) { value = -value; }

    /* initialize the object */
    result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
    MVMROOT(tc, result, {
        MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
        MVMROOT(tc, box_type, {
            MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, base);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, pos);
            MVM_repr_push_o(tc, result, boxed);
        });
    });
Пример #4
0
/* Maps a calling convention name to an ID. */
MVMint16 MVM_nativecall_get_calling_convention(MVMThreadContext *tc, MVMString *name) {
    MVMint16 result = DC_CALL_C_DEFAULT;
    if (name && MVM_string_graphs(tc, name) > 0) {
        char *cname = MVM_string_utf8_encode_C_string(tc, name);
        if (strcmp(cname, "cdecl") == 0)
            result = DC_CALL_C_X86_CDECL;
        else if (strcmp(cname, "stdcall") == 0)
            result = DC_CALL_C_X86_WIN32_STD;
        else if (strcmp(cname, "stdcall") == 0)
            result = DC_CALL_C_X64_WIN64;
        else {
            char *waste[] = { cname, NULL };
            MVM_exception_throw_adhoc_free(tc, waste,
                "Unknown calling convention '%s' used for native call", cname);
        }
        MVM_free(cname);
    }
    return result;
}
Пример #5
0
/* Encodes the specified substring to latin-1. Anything outside of latin-1 range
 * will become a ?. The result string is NULL terminated, but the specified
 * size is the non-null part. */
char * MVM_string_latin1_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) {
    /* Latin-1 is a single byte encoding, so each grapheme will just become
     * a single byte. */
    MVMuint32 startu = (MVMuint32)start;
    MVMStringIndex strgraphs = MVM_string_graphs(tc, str);
    MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length);
    MVMuint8 *result;
    size_t i;

    /* must check start first since it's used in the length check */
    if (start < 0 || start > strgraphs)
        MVM_exception_throw_adhoc(tc, "start out of range");
    if (length < -1 || start + lengthu > strgraphs)
        MVM_exception_throw_adhoc(tc, "length out of range");

    result = MVM_malloc(lengthu + 1);
    if (str->body.storage_type == MVM_STRING_GRAPHEME_ASCII) {
        /* No encoding needed; directly copy. */
        memcpy(result, str->body.storage.blob_ascii, lengthu);
        result[lengthu] = 0;
    }
    else {
        MVMuint32 i = 0;
        MVMCodepointIter ci;
        MVM_string_ci_init(tc, &ci, str);
        while (MVM_string_ci_has_more(tc, &ci)) {
            MVMCodepoint ord = MVM_string_ci_get_codepoint(tc, &ci);
            if (ord >= 0 && ord <= 255)
                result[i] = (MVMuint8)ord;
            else
                result[i] = '?';
            i++;
        }
        result[i] = 0;
    }
    if (output_size)
        *output_size = lengthu;
    return (char *)result;
}
Пример #6
0
void MVM_coerce_istrue(MVMThreadContext *tc, MVMObject *obj, MVMRegister *res_reg,
        MVMuint8 *true_addr, MVMuint8 *false_addr, MVMuint8 flip) {
    MVMint64 result = 0;
    if (!MVM_is_null(tc, obj)) {
        MVMBoolificationSpec *bs = obj->st->boolification_spec;
        switch (bs == NULL ? MVM_BOOL_MODE_NOT_TYPE_OBJECT : bs->mode) {
            case MVM_BOOL_MODE_CALL_METHOD: {
                MVMObject *code = MVM_frame_find_invokee(tc, bs->method, NULL);
                MVMCallsite *inv_arg_callsite = MVM_callsite_get_common(tc, MVM_CALLSITE_ID_INV_ARG);
                if (res_reg) {
                    /* We need to do the invocation, and set this register
                     * the result. Then we just do the call. For the flip
                     * case, just set up special return handler to flip
                     * the register. */
                    MVM_args_setup_thunk(tc, res_reg, MVM_RETURN_INT, inv_arg_callsite);
                    tc->cur_frame->args[0].o = obj;
                    if (flip) {
                        tc->cur_frame->special_return      = flip_return;
                        tc->cur_frame->special_return_data = res_reg;
                    }
                    STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args);
                }
                else {
                    /* Need to set up special return hook. */
                    BoolMethReturnData *data = MVM_malloc(sizeof(BoolMethReturnData));
                    data->true_addr  = true_addr;
                    data->false_addr = false_addr;
                    data->flip       = flip;
                    tc->cur_frame->special_return      = boolify_return;
                    tc->cur_frame->special_return_data = data;
                    MVM_args_setup_thunk(tc, &data->res_reg, MVM_RETURN_INT, inv_arg_callsite);
                    tc->cur_frame->args[0].o = obj;
                    STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args);
                }
                return;
            }
            case MVM_BOOL_MODE_UNBOX_INT:
                result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0 ? 0 : 1;
                break;
            case MVM_BOOL_MODE_UNBOX_NUM:
                result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_num(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0.0 ? 0 : 1;
                break;
            case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY: {
                MVMString *str;
                if (!IS_CONCRETE(obj)) {
                    result = 0;
                    break;
                }
                str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj));
                result = MVM_coerce_istrue_s(tc, str);
                break;
            }
            case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY_OR_ZERO: {
                MVMString *str;
                MVMint64 chars;
                if (!IS_CONCRETE(obj)) {
                    result = 0;
                    break;
                }
                str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj));

                if (str == NULL || !IS_CONCRETE(str)) {
                    result = 0;
                    break;
                }

                chars = MVM_string_graphs(tc, str);

                result = chars == 0 ||
                        (chars == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48)
                        ? 0 : 1;
                break;
            }
            case MVM_BOOL_MODE_NOT_TYPE_OBJECT:
                result = !IS_CONCRETE(obj) ? 0 : 1;
                break;
            case MVM_BOOL_MODE_BIGINT:
                result = IS_CONCRETE(obj) ? MVM_bigint_bool(tc, obj) : 0;
                break;
            case MVM_BOOL_MODE_ITER:
                result = IS_CONCRETE(obj) ? MVM_iter_istrue(tc, (MVMIter *)obj) : 0;
                break;
            case MVM_BOOL_MODE_HAS_ELEMS:
                result = IS_CONCRETE(obj) ? MVM_repr_elems(tc, obj) != 0 : 0;
                break;
            default:
                MVM_exception_throw_adhoc(tc, "Invalid boolification spec mode used");
        }
    }

    if (flip)
        result = result ? 0 : 1;

    if (res_reg) {
        res_reg->i64 = result;
    }
    else {
        if (result)
            *(tc->interp_cur_op) = true_addr;
        else
            *(tc->interp_cur_op) = false_addr;
    }
}
Пример #7
0
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) {
    return str == NULL || !IS_CONCRETE(str) || MVM_string_graphs(tc, str) == 0 ? 0 : 1;
}
Пример #8
0
/* Encodes the specified substring to latin-1. Anything outside of latin-1 range
 * will become a ?. The result string is NULL terminated, but the specified
 * size is the non-null part. */
char * MVM_string_latin1_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length,
        MVMString *replacement, MVMint32 translate_newlines) {
    /* Latin-1 is a single byte encoding, but \r\n is a 2-byte grapheme, so we
     * may have to resize as we go. */
    MVMuint32 startu = (MVMuint32)start;
    MVMStringIndex strgraphs = MVM_string_graphs(tc, str);
    MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length);
    MVMuint8 *result;
    size_t result_alloc;
    MVMuint8 *repl_bytes = NULL;
    MVMuint64 repl_length;

    /* must check start first since it's used in the length check */
    if (start < 0 || start > strgraphs)
        MVM_exception_throw_adhoc(tc, "start out of range");
    if (length < -1 || start + lengthu > strgraphs)
        MVM_exception_throw_adhoc(tc, "length out of range");

    if (replacement)
        repl_bytes = (MVMuint8 *) MVM_string_latin1_encode_substr(tc,
            replacement, &repl_length, 0, -1, NULL, translate_newlines);

    result_alloc = lengthu;
    result = MVM_malloc(result_alloc + 1);
    if (str->body.storage_type == MVM_STRING_GRAPHEME_ASCII) {
        /* No encoding needed; directly copy. */
        memcpy(result, str->body.storage.blob_ascii, lengthu);
        result[lengthu] = 0;
        if (output_size)
            *output_size = lengthu;
    }
    else {
        MVMuint32 i = 0;
        MVMCodepointIter ci;
        MVM_string_ci_init(tc, &ci, str, translate_newlines);
        while (MVM_string_ci_has_more(tc, &ci)) {
            MVMCodepoint ord = MVM_string_ci_get_codepoint(tc, &ci);
            if (i == result_alloc) {
                result_alloc += 8;
                result = MVM_realloc(result, result_alloc + 1);
            }
            if (ord >= 0 && ord <= 255) {
                result[i] = (MVMuint8)ord;
                i++;
            }
            else if (replacement) {
                if (repl_length >= result_alloc || i >= result_alloc - repl_length) {
                    result_alloc += repl_length;
                    result = MVM_realloc(result, result_alloc + 1);
                }
                memcpy(result + i, repl_bytes, repl_length);
                i += repl_length;
            }
            else {
                MVM_free(result);
                MVM_free(repl_bytes);
                MVM_exception_throw_adhoc(tc,
                    "Error encoding Latin-1 string: could not encode codepoint %d",
                    ord);
            }
        }
        result[i] = 0;
        if (output_size)
            *output_size = i;
    }
    MVM_free(repl_bytes);
    return (char *)result;
}
Пример #9
0
/* Set the line separator. */
static void set_separator(MVMThreadContext *tc, MVMOSHandle *h, MVMString *sep) {
    MVMIOFileData *data = (MVMIOFileData *)h->body.data;
    data->sep = (MVMGrapheme32)MVM_string_get_grapheme_at(tc, sep,
        MVM_string_graphs(tc, sep) - 1);
}
Пример #10
0
static double parse_int_frac_exp(MVMThreadContext *tc, MVMCodepointIter *ci, MVMCodepoint *cp, MVMString* s, double radix, int leading_zero) {
    /*
     * What we do here is extract the digits from the original string,
     * effectively stripping off underscores and converting fancy Unicode
     * digits to regular ones. We then ASCII-fy those digits and stuff
     * them into digits_buf (along with double-ish things like the dot
     * and 'e'). At the end we give the resultant string to strtod() to
     * do all the dirty work for us, so we don't have to worry about
     * handling denormals or picking closest representable double
     */
    int digits = 0;
    int frac_digits = 0;
    int digit;
    int ends_with_underscore = 0;
    char *digits_buf = (char *)MVM_malloc(1 + MVM_string_graphs(tc, s));
    char *digits_buf_tail = digits_buf;
    double result;

    if (*cp == '_')
        parse_error(tc, s, "number can't start with _");

    if (*cp != '.') {
        while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) {
            ends_with_underscore = *cp == '_';
            if (*cp != '_') {
                if (digit >= radix) break;
                *digits_buf_tail++ = '0' + digit;
                digits++;
            }
            get_cp(tc, ci, cp);
        }
        if (ends_with_underscore)
            parse_error(tc, s, "a number can't end in underscore");
    }


    if (*cp == '.') {
        *digits_buf_tail++ = '.';
        get_cp(tc, ci, cp);
        if (*cp == '_')
            parse_error(tc, s, "radix point can't be followed by _");
        while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) {
            ends_with_underscore = *cp == '_';
            if (*cp != '_') {
                if (digit >= radix) break;
                *digits_buf_tail++ = '0' + digit;
                frac_digits++;
            }
            get_cp(tc, ci, cp);
        }
        if (frac_digits == 0)
            parse_error(tc, s,
                "radix point must be followed by one or more valid digits");
        if (ends_with_underscore)
            parse_error(tc, s, "a number can't end in underscore");
    }

    if (digits == 0 && frac_digits == 0 && !leading_zero)
        parse_error(tc, s, "expecting a number");

    if (*cp == 'E' || *cp == 'e') {
        int e_digits = 0;

        *digits_buf_tail++ = 'e';
        get_cp(tc, ci, cp);

        if (parse_sign(tc, ci, cp) == -1)
            *digits_buf_tail++ = '-';
        if (*cp == '_')
            parse_error(tc, s, "'e' or 'E' can't be followed by _");
        while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) {
            if (*cp != '_') {
                if (digit >= radix) break;
                *digits_buf_tail++ = '0' + digit;
                e_digits++;
            }
            get_cp(tc, ci, cp);
        }
        if (e_digits == 0)
            parse_error(tc, s,
                "'e' or 'E' must be followed by one or more valid digits");
    }

    *digits_buf_tail = '\0';
    result = strtod(digits_buf, NULL);
    MVM_free(digits_buf);
    return result;
}
Пример #11
0
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
    MVMObject *result;
    MVMint64 zvalue = 0;
    MVMint64 zbase  = 1;
    MVMint64 chars  = MVM_string_graphs(tc, str);
    MVMint64 value  = zvalue;
    MVMint64 base   = zbase;
    MVMint64   pos  = -1;
    MVMuint16  neg  = 0;
    MVMint64   ch;

    if (radix > 36) {
        MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix);
    }

    ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    if ((flag & 0x02) && (ch == '+' || ch == '-')) {
        neg = (ch == '-');
        offset++;
        ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    }

    while (offset < chars) {
        if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
        else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
        else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
        else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
        else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, 
                MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
            /* As of Unicode 6.0.0, we know that Nd category numerals are within
             * the range 0..9
             */

            /* the string returned for NUMERIC_VALUE contains a floating point
             * value, so atoi will stop on the . in the string. This is fine
             * though, since we'd have to truncate the float regardless.
             */
            ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
        }
        else break;
        if (ch >= radix) break;
        zvalue = zvalue * radix + ch;
        zbase = zbase * radix;
        offset++; pos = offset;
        if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
        if (ch != '_') continue;
        offset++;
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
    }

    if (neg || flag & 0x01) { value = -value; }

    /* initialize the object */
    result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
    MVMROOT(tc, result, {
        MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
        MVMROOT(tc, box_type, {
            MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, base);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, pos);
            MVM_repr_push_o(tc, result, boxed);
        });
    });
Пример #12
0
/* Set the line separator. */
void MVM_io_syncstream_set_separator(MVMThreadContext *tc, MVMOSHandle *h, MVMString *sep) {
    /* For now, take last character. */
    MVMIOSyncStreamData *data = (MVMIOSyncStreamData *)h->body.data;
    data->sep = (MVMGrapheme32)MVM_string_get_grapheme_at(tc, sep,
                MVM_string_graphs(tc, sep) - 1);
}