示例#1
0
文件: nfg.c 项目: baby-gnu/MoarVM
MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b) {
    MVMGrapheme32 last_a;
    MVMGrapheme32 first_b;

    /* If either string is empty, we're good. */
    if (a->body.num_graphs == 0 || b->body.num_graphs == 0)
        return 1;

    /* Get first and last graphemes of the strings. */
    last_a = MVM_string_get_grapheme_at_nocheck(tc, a, a->body.num_graphs - 1);
    first_b = MVM_string_get_grapheme_at_nocheck(tc, b, 0);

    /* If either is synthetic, assume we'll have to re-normalize (this is an
     * over-estimate, most likely). Note if you optimize this that it serves
     * as a guard for what follows. */
    if (last_a < 0 || first_b < 0)
        return 0;

    /* If both less than the first significant char for NFC, and the first is
     * not \r, we're good. */
    if (last_a != 0x0D && last_a < MVM_NORMALIZE_FIRST_SIG_NFC
                       && first_b < MVM_NORMALIZE_FIRST_SIG_NFC)
        return 1;

    /* If either fail quickcheck or have ccc > 0, have to re-normalize. */
    return passes_quickcheck_and_zero_ccc(tc, last_a) && passes_quickcheck_and_zero_ccc(tc, first_b);
}
示例#2
0
文件: nfg.c 项目: MasterDuke17/MoarVM
/* Returns non-zero if the result of concatenating the two strings will freely
 * leave us in NFG without any further effort. */
MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b) {
    MVMGrapheme32 last_a;
    MVMGrapheme32 first_b;
    MVMGrapheme32 crlf;

    /* If either string is empty, we're good. */
    if (a->body.num_graphs == 0 || b->body.num_graphs == 0)
        return 1;

    /* Get first and last graphemes of the strings. */
    last_a  = MVM_string_get_grapheme_at_nocheck(tc, a, a->body.num_graphs - 1);
    first_b = MVM_string_get_grapheme_at_nocheck(tc, b, 0);
    /* Put the case where we are adding a lf or crlf line ending */
    if (first_b == '\n')
        /* If we see \r + \n we need to renormalize. Otherwise we're good */
        return last_a == '\r' ? 0 : 1;

    crlf = MVM_nfg_crlf_grapheme(tc);
    /* As a control code we are always going to break if we see one of these.
     * Check first_b for speeding up line endings */
    if (first_b == crlf || last_a == crlf)
        return 0;
    /* If either is synthetic other than "\r\n", assume we'll have to re-normalize
     * (this is an over-estimate, most likely). Note if you optimize this that it
     * serves as a guard for what follows.
     * TODO get the last codepoint of last_a and first codepoint of first_b and call
     * MVM_unicode_normalize_should_break */
    if (last_a < 0 || first_b < 0)
        return 0;

    /* If both less than the first significant char for NFC we are good */
    if (last_a < MVM_NORMALIZE_FIRST_SIG_NFC && first_b < MVM_NORMALIZE_FIRST_SIG_NFC) {
        return 1;
    }
    else {
        /* Check if the two codepoints would be joined during normalization.
         * Returns 1 if they would break and thus is safe under concat, or 0 if
         * they would be joined. */
        MVMNormalizer norm;
        int rtrn;
        MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG);
        /* Since we are only looking at two codepoints, we don't know what came
         * before. Because of special rules with Regional Indicators, pretend
         * the previous codepoint was a regional indicator. This will return the
         * special value of 2 from MVM_unicode_normalize_should_break and trigger
         * re_nfg if last_a and first_b are both regional indicators and we will
         * never break NFG regardless of what the codepoint before last_a is. */
        norm.regional_indicator = 1;
        rtrn = MVM_unicode_normalize_should_break(tc, last_a, first_b, &norm);
        MVM_unicode_normalizer_cleanup(tc, &norm);
        /* If both CCC are non-zero then it may need to be reordered. For now return 0.
         * This can be optimized. */
        if (MVM_unicode_relative_ccc(tc, last_a) != 0 && MVM_unicode_relative_ccc(tc, first_b) != 0)
            return 0;
        return rtrn;
    }
}
示例#3
0
文件: coerce.c 项目: krunen/MoarVM
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
    MVMObject *result;
    MVMint64 zvalue = 0;
    MVMint64 zbase  = 1;
    MVMint64 chars  = MVM_string_graphs(tc, str);
    MVMint64 value  = zvalue;
    MVMint64 base   = zbase;
    MVMint64   pos  = -1;
    MVMuint16  neg  = 0;
    MVMint64   ch;

    if (radix > 36) {
        MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix);
    }

    ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    if ((flag & 0x02) && (ch == '+' || ch == '-')) {
        neg = (ch == '-');
        offset++;
        ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    }

    while (offset < chars) {
        if (ch >= '0' && ch <= '9') ch = ch - '0';
        else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
        else break;
        if (ch >= radix) break;
        zvalue = zvalue * radix + ch;
        zbase = zbase * radix;
        offset++; pos = offset;
        if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
        if (ch != '_') continue;
        offset++;
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
    }

    if (neg || flag & 0x01) { value = -value; }

    /* initialize the object */
    result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
    MVMROOT(tc, result, {
        MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
        MVMROOT(tc, box_type, {
            MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, base);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, pos);
            MVM_repr_push_o(tc, result, boxed);
        });
    });
示例#4
0
文件: coerce.c 项目: krunen/MoarVM
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) {
    return str == NULL ||
           !IS_CONCRETE(str) ||
           MVM_string_graphs(tc, str) == 0 ||
           (MVM_string_graphs(tc, str) == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48)
           ? 0 : 1;
}
示例#5
0
文件: coerce.c 项目: niner/MoarVM
void MVM_coerce_istrue(MVMThreadContext *tc, MVMObject *obj, MVMRegister *res_reg,
        MVMuint8 *true_addr, MVMuint8 *false_addr, MVMuint8 flip) {
    MVMint64 result = 0;
    if (!MVM_is_null(tc, obj)) {
        MVMBoolificationSpec *bs = obj->st->boolification_spec;
        switch (bs == NULL ? MVM_BOOL_MODE_NOT_TYPE_OBJECT : bs->mode) {
            case MVM_BOOL_MODE_CALL_METHOD: {
                MVMObject *code = MVM_frame_find_invokee(tc, bs->method, NULL);
                MVMCallsite *inv_arg_callsite = MVM_callsite_get_common(tc, MVM_CALLSITE_ID_INV_ARG);
                if (res_reg) {
                    /* We need to do the invocation, and set this register
                     * the result. Then we just do the call. For the flip
                     * case, just set up special return handler to flip
                     * the register. */
                    MVM_args_setup_thunk(tc, res_reg, MVM_RETURN_INT, inv_arg_callsite);
                    tc->cur_frame->args[0].o = obj;
                    if (flip) {
                        tc->cur_frame->special_return      = flip_return;
                        tc->cur_frame->special_return_data = res_reg;
                    }
                    STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args);
                }
                else {
                    /* Need to set up special return hook. */
                    BoolMethReturnData *data = MVM_malloc(sizeof(BoolMethReturnData));
                    data->true_addr  = true_addr;
                    data->false_addr = false_addr;
                    data->flip       = flip;
                    tc->cur_frame->special_return      = boolify_return;
                    tc->cur_frame->special_return_data = data;
                    MVM_args_setup_thunk(tc, &data->res_reg, MVM_RETURN_INT, inv_arg_callsite);
                    tc->cur_frame->args[0].o = obj;
                    STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args);
                }
                return;
            }
            case MVM_BOOL_MODE_UNBOX_INT:
                result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0 ? 0 : 1;
                break;
            case MVM_BOOL_MODE_UNBOX_NUM:
                result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_num(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0.0 ? 0 : 1;
                break;
            case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY: {
                MVMString *str;
                if (!IS_CONCRETE(obj)) {
                    result = 0;
                    break;
                }
                str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj));
                result = MVM_coerce_istrue_s(tc, str);
                break;
            }
            case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY_OR_ZERO: {
                MVMString *str;
                MVMint64 chars;
                if (!IS_CONCRETE(obj)) {
                    result = 0;
                    break;
                }
                str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj));

                if (str == NULL || !IS_CONCRETE(str)) {
                    result = 0;
                    break;
                }

                chars = MVM_string_graphs(tc, str);

                result = chars == 0 ||
                        (chars == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48)
                        ? 0 : 1;
                break;
            }
            case MVM_BOOL_MODE_NOT_TYPE_OBJECT:
                result = !IS_CONCRETE(obj) ? 0 : 1;
                break;
            case MVM_BOOL_MODE_BIGINT:
                result = IS_CONCRETE(obj) ? MVM_bigint_bool(tc, obj) : 0;
                break;
            case MVM_BOOL_MODE_ITER:
                result = IS_CONCRETE(obj) ? MVM_iter_istrue(tc, (MVMIter *)obj) : 0;
                break;
            case MVM_BOOL_MODE_HAS_ELEMS:
                result = IS_CONCRETE(obj) ? MVM_repr_elems(tc, obj) != 0 : 0;
                break;
            default:
                MVM_exception_throw_adhoc(tc, "Invalid boolification spec mode used");
        }
    }

    if (flip)
        result = result ? 0 : 1;

    if (res_reg) {
        res_reg->i64 = result;
    }
    else {
        if (result)
            *(tc->interp_cur_op) = true_addr;
        else
            *(tc->interp_cur_op) = false_addr;
    }
}
示例#6
0
文件: coerce.c 项目: baby-gnu/MoarVM
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
    MVMObject *result;
    MVMint64 zvalue = 0;
    MVMint64 zbase  = 1;
    MVMint64 chars  = MVM_string_graphs(tc, str);
    MVMint64 value  = zvalue;
    MVMint64 base   = zbase;
    MVMint64   pos  = -1;
    MVMuint16  neg  = 0;
    MVMint64   ch;

    if (radix > 36) {
        MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix);
    }

    ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    if ((flag & 0x02) && (ch == '+' || ch == '-')) {
        neg = (ch == '-');
        offset++;
        ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
    }

    while (offset < chars) {
        if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
        else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
        else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
        else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
        else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, 
                MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
            /* As of Unicode 6.0.0, we know that Nd category numerals are within
             * the range 0..9
             */

            /* the string returned for NUMERIC_VALUE contains a floating point
             * value, so atoi will stop on the . in the string. This is fine
             * though, since we'd have to truncate the float regardless.
             */
            ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
        }
        else break;
        if (ch >= radix) break;
        zvalue = zvalue * radix + ch;
        zbase = zbase * radix;
        offset++; pos = offset;
        if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
        if (ch != '_') continue;
        offset++;
        if (offset >= chars) break;
        ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
    }

    if (neg || flag & 0x01) { value = -value; }

    /* initialize the object */
    result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
    MVMROOT(tc, result, {
        MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
        MVMROOT(tc, box_type, {
            MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, base);
            MVM_repr_push_o(tc, result, boxed);
            boxed = MVM_repr_box_int(tc, box_type, pos);
            MVM_repr_push_o(tc, result, boxed);
        });
    });