MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b) { MVMGrapheme32 last_a; MVMGrapheme32 first_b; /* If either string is empty, we're good. */ if (a->body.num_graphs == 0 || b->body.num_graphs == 0) return 1; /* Get first and last graphemes of the strings. */ last_a = MVM_string_get_grapheme_at_nocheck(tc, a, a->body.num_graphs - 1); first_b = MVM_string_get_grapheme_at_nocheck(tc, b, 0); /* If either is synthetic, assume we'll have to re-normalize (this is an * over-estimate, most likely). Note if you optimize this that it serves * as a guard for what follows. */ if (last_a < 0 || first_b < 0) return 0; /* If both less than the first significant char for NFC, and the first is * not \r, we're good. */ if (last_a != 0x0D && last_a < MVM_NORMALIZE_FIRST_SIG_NFC && first_b < MVM_NORMALIZE_FIRST_SIG_NFC) return 1; /* If either fail quickcheck or have ccc > 0, have to re-normalize. */ return passes_quickcheck_and_zero_ccc(tc, last_a) && passes_quickcheck_and_zero_ccc(tc, first_b); }
/* Returns non-zero if the result of concatenating the two strings will freely * leave us in NFG without any further effort. */ MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b) { MVMGrapheme32 last_a; MVMGrapheme32 first_b; MVMGrapheme32 crlf; /* If either string is empty, we're good. */ if (a->body.num_graphs == 0 || b->body.num_graphs == 0) return 1; /* Get first and last graphemes of the strings. */ last_a = MVM_string_get_grapheme_at_nocheck(tc, a, a->body.num_graphs - 1); first_b = MVM_string_get_grapheme_at_nocheck(tc, b, 0); /* Put the case where we are adding a lf or crlf line ending */ if (first_b == '\n') /* If we see \r + \n we need to renormalize. Otherwise we're good */ return last_a == '\r' ? 0 : 1; crlf = MVM_nfg_crlf_grapheme(tc); /* As a control code we are always going to break if we see one of these. * Check first_b for speeding up line endings */ if (first_b == crlf || last_a == crlf) return 0; /* If either is synthetic other than "\r\n", assume we'll have to re-normalize * (this is an over-estimate, most likely). Note if you optimize this that it * serves as a guard for what follows. * TODO get the last codepoint of last_a and first codepoint of first_b and call * MVM_unicode_normalize_should_break */ if (last_a < 0 || first_b < 0) return 0; /* If both less than the first significant char for NFC we are good */ if (last_a < MVM_NORMALIZE_FIRST_SIG_NFC && first_b < MVM_NORMALIZE_FIRST_SIG_NFC) { return 1; } else { /* Check if the two codepoints would be joined during normalization. * Returns 1 if they would break and thus is safe under concat, or 0 if * they would be joined. */ MVMNormalizer norm; int rtrn; MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFG); /* Since we are only looking at two codepoints, we don't know what came * before. Because of special rules with Regional Indicators, pretend * the previous codepoint was a regional indicator. This will return the * special value of 2 from MVM_unicode_normalize_should_break and trigger * re_nfg if last_a and first_b are both regional indicators and we will * never break NFG regardless of what the codepoint before last_a is. */ norm.regional_indicator = 1; rtrn = MVM_unicode_normalize_should_break(tc, last_a, first_b, &norm); MVM_unicode_normalizer_cleanup(tc, &norm); /* If both CCC are non-zero then it may need to be reordered. For now return 0. * This can be optimized. */ if (MVM_unicode_relative_ccc(tc, last_a) != 0 && MVM_unicode_relative_ccc(tc, first_b) != 0) return 0; return rtrn; } }
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { MVMObject *result; MVMint64 zvalue = 0; MVMint64 zbase = 1; MVMint64 chars = MVM_string_graphs(tc, str); MVMint64 value = zvalue; MVMint64 base = zbase; MVMint64 pos = -1; MVMuint16 neg = 0; MVMint64 ch; if (radix > 36) { MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix); } ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; if ((flag & 0x02) && (ch == '+' || ch == '-')) { neg = (ch == '-'); offset++; ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; } while (offset < chars) { if (ch >= '0' && ch <= '9') ch = ch - '0'; else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; else break; if (ch >= radix) break; zvalue = zvalue * radix + ch; zbase = zbase * radix; offset++; pos = offset; if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); if (ch != '_') continue; offset++; if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); } if (neg || flag & 0x01) { value = -value; } /* initialize the object */ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); MVMROOT(tc, result, { MVMObject *box_type = MVM_hll_current(tc)->int_box_type; MVMROOT(tc, box_type, { MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, base); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, pos); MVM_repr_push_o(tc, result, boxed); }); });
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) { return str == NULL || !IS_CONCRETE(str) || MVM_string_graphs(tc, str) == 0 || (MVM_string_graphs(tc, str) == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48) ? 0 : 1; }
void MVM_coerce_istrue(MVMThreadContext *tc, MVMObject *obj, MVMRegister *res_reg, MVMuint8 *true_addr, MVMuint8 *false_addr, MVMuint8 flip) { MVMint64 result = 0; if (!MVM_is_null(tc, obj)) { MVMBoolificationSpec *bs = obj->st->boolification_spec; switch (bs == NULL ? MVM_BOOL_MODE_NOT_TYPE_OBJECT : bs->mode) { case MVM_BOOL_MODE_CALL_METHOD: { MVMObject *code = MVM_frame_find_invokee(tc, bs->method, NULL); MVMCallsite *inv_arg_callsite = MVM_callsite_get_common(tc, MVM_CALLSITE_ID_INV_ARG); if (res_reg) { /* We need to do the invocation, and set this register * the result. Then we just do the call. For the flip * case, just set up special return handler to flip * the register. */ MVM_args_setup_thunk(tc, res_reg, MVM_RETURN_INT, inv_arg_callsite); tc->cur_frame->args[0].o = obj; if (flip) { tc->cur_frame->special_return = flip_return; tc->cur_frame->special_return_data = res_reg; } STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args); } else { /* Need to set up special return hook. */ BoolMethReturnData *data = MVM_malloc(sizeof(BoolMethReturnData)); data->true_addr = true_addr; data->false_addr = false_addr; data->flip = flip; tc->cur_frame->special_return = boolify_return; tc->cur_frame->special_return_data = data; MVM_args_setup_thunk(tc, &data->res_reg, MVM_RETURN_INT, inv_arg_callsite); tc->cur_frame->args[0].o = obj; STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args); } return; } case MVM_BOOL_MODE_UNBOX_INT: result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0 ? 0 : 1; break; case MVM_BOOL_MODE_UNBOX_NUM: result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_num(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0.0 ? 0 : 1; break; case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY: { MVMString *str; if (!IS_CONCRETE(obj)) { result = 0; break; } str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)); result = MVM_coerce_istrue_s(tc, str); break; } case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY_OR_ZERO: { MVMString *str; MVMint64 chars; if (!IS_CONCRETE(obj)) { result = 0; break; } str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)); if (str == NULL || !IS_CONCRETE(str)) { result = 0; break; } chars = MVM_string_graphs(tc, str); result = chars == 0 || (chars == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48) ? 0 : 1; break; } case MVM_BOOL_MODE_NOT_TYPE_OBJECT: result = !IS_CONCRETE(obj) ? 0 : 1; break; case MVM_BOOL_MODE_BIGINT: result = IS_CONCRETE(obj) ? MVM_bigint_bool(tc, obj) : 0; break; case MVM_BOOL_MODE_ITER: result = IS_CONCRETE(obj) ? MVM_iter_istrue(tc, (MVMIter *)obj) : 0; break; case MVM_BOOL_MODE_HAS_ELEMS: result = IS_CONCRETE(obj) ? MVM_repr_elems(tc, obj) != 0 : 0; break; default: MVM_exception_throw_adhoc(tc, "Invalid boolification spec mode used"); } } if (flip) result = result ? 0 : 1; if (res_reg) { res_reg->i64 = result; } else { if (result) *(tc->interp_cur_op) = true_addr; else *(tc->interp_cur_op) = false_addr; } }
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { MVMObject *result; MVMint64 zvalue = 0; MVMint64 zbase = 1; MVMint64 chars = MVM_string_graphs(tc, str); MVMint64 value = zvalue; MVMint64 base = zbase; MVMint64 pos = -1; MVMuint16 neg = 0; MVMint64 ch; if (radix > 36) { MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix); } ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; if ((flag & 0x02) && (ch == '+' || ch == '-')) { neg = (ch == '-'); offset++; ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; } while (offset < chars) { if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { /* As of Unicode 6.0.0, we know that Nd category numerals are within * the range 0..9 */ /* the string returned for NUMERIC_VALUE contains a floating point * value, so atoi will stop on the . in the string. This is fine * though, since we'd have to truncate the float regardless. */ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); } else break; if (ch >= radix) break; zvalue = zvalue * radix + ch; zbase = zbase * radix; offset++; pos = offset; if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); if (ch != '_') continue; offset++; if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); } if (neg || flag & 0x01) { value = -value; } /* initialize the object */ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); MVMROOT(tc, result, { MVMObject *box_type = MVM_hll_current(tc)->int_box_type; MVMROOT(tc, box_type, { MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, base); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, pos); MVM_repr_push_o(tc, result, boxed); }); });