MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) { return str == NULL || !IS_CONCRETE(str) || MVM_string_graphs(tc, str) == 0 || (MVM_string_graphs(tc, str) == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48) ? 0 : 1; }
/* Takes a string and sets it up as a decode stream separator. */ void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMString **seps, MVMint32 num_seps) { MVMGraphemeIter gi; MVMint32 i, graph_length, graph_pos; if (num_seps > 0xFFF) MVM_exception_throw_adhoc(tc, "Too many line separators"); MVM_free(sep_spec->sep_lengths); MVM_free(sep_spec->sep_graphemes); sep_spec->num_seps = num_seps; sep_spec->sep_lengths = MVM_malloc(num_seps * sizeof(MVMint32)); graph_length = 0; for (i = 0; i < num_seps; i++) { MVMuint32 num_graphs = MVM_string_graphs(tc, seps[i]); if (num_graphs > 0xFFFF) MVM_exception_throw_adhoc(tc, "Line separator too long"); sep_spec->sep_lengths[i] = num_graphs; graph_length += num_graphs; } sep_spec->sep_graphemes = MVM_malloc(graph_length * sizeof(MVMGrapheme32)); graph_pos = 0; for (i = 0; i < num_seps; i++) { MVM_string_gi_init(tc, &gi, seps[i]); while (MVM_string_gi_has_more(tc, &gi)) sep_spec->sep_graphemes[graph_pos++] = MVM_string_gi_get_grapheme(tc, &gi); } }
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { MVMObject *result; MVMint64 zvalue = 0; MVMint64 zbase = 1; MVMint64 chars = MVM_string_graphs(tc, str); MVMint64 value = zvalue; MVMint64 base = zbase; MVMint64 pos = -1; MVMuint16 neg = 0; MVMint64 ch; if (radix > 36) { MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix); } ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; if ((flag & 0x02) && (ch == '+' || ch == '-')) { neg = (ch == '-'); offset++; ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; } while (offset < chars) { if (ch >= '0' && ch <= '9') ch = ch - '0'; else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; else break; if (ch >= radix) break; zvalue = zvalue * radix + ch; zbase = zbase * radix; offset++; pos = offset; if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); if (ch != '_') continue; offset++; if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); } if (neg || flag & 0x01) { value = -value; } /* initialize the object */ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); MVMROOT(tc, result, { MVMObject *box_type = MVM_hll_current(tc)->int_box_type; MVMROOT(tc, box_type, { MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, base); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, pos); MVM_repr_push_o(tc, result, boxed); }); });
/* Maps a calling convention name to an ID. */ MVMint16 MVM_nativecall_get_calling_convention(MVMThreadContext *tc, MVMString *name) { MVMint16 result = DC_CALL_C_DEFAULT; if (name && MVM_string_graphs(tc, name) > 0) { char *cname = MVM_string_utf8_encode_C_string(tc, name); if (strcmp(cname, "cdecl") == 0) result = DC_CALL_C_X86_CDECL; else if (strcmp(cname, "stdcall") == 0) result = DC_CALL_C_X86_WIN32_STD; else if (strcmp(cname, "stdcall") == 0) result = DC_CALL_C_X64_WIN64; else { char *waste[] = { cname, NULL }; MVM_exception_throw_adhoc_free(tc, waste, "Unknown calling convention '%s' used for native call", cname); } MVM_free(cname); } return result; }
/* Encodes the specified substring to latin-1. Anything outside of latin-1 range * will become a ?. The result string is NULL terminated, but the specified * size is the non-null part. */ char * MVM_string_latin1_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) { /* Latin-1 is a single byte encoding, so each grapheme will just become * a single byte. */ MVMuint32 startu = (MVMuint32)start; MVMStringIndex strgraphs = MVM_string_graphs(tc, str); MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length); MVMuint8 *result; size_t i; /* must check start first since it's used in the length check */ if (start < 0 || start > strgraphs) MVM_exception_throw_adhoc(tc, "start out of range"); if (length < -1 || start + lengthu > strgraphs) MVM_exception_throw_adhoc(tc, "length out of range"); result = MVM_malloc(lengthu + 1); if (str->body.storage_type == MVM_STRING_GRAPHEME_ASCII) { /* No encoding needed; directly copy. */ memcpy(result, str->body.storage.blob_ascii, lengthu); result[lengthu] = 0; } else { MVMuint32 i = 0; MVMCodepointIter ci; MVM_string_ci_init(tc, &ci, str); while (MVM_string_ci_has_more(tc, &ci)) { MVMCodepoint ord = MVM_string_ci_get_codepoint(tc, &ci); if (ord >= 0 && ord <= 255) result[i] = (MVMuint8)ord; else result[i] = '?'; i++; } result[i] = 0; } if (output_size) *output_size = lengthu; return (char *)result; }
void MVM_coerce_istrue(MVMThreadContext *tc, MVMObject *obj, MVMRegister *res_reg, MVMuint8 *true_addr, MVMuint8 *false_addr, MVMuint8 flip) { MVMint64 result = 0; if (!MVM_is_null(tc, obj)) { MVMBoolificationSpec *bs = obj->st->boolification_spec; switch (bs == NULL ? MVM_BOOL_MODE_NOT_TYPE_OBJECT : bs->mode) { case MVM_BOOL_MODE_CALL_METHOD: { MVMObject *code = MVM_frame_find_invokee(tc, bs->method, NULL); MVMCallsite *inv_arg_callsite = MVM_callsite_get_common(tc, MVM_CALLSITE_ID_INV_ARG); if (res_reg) { /* We need to do the invocation, and set this register * the result. Then we just do the call. For the flip * case, just set up special return handler to flip * the register. */ MVM_args_setup_thunk(tc, res_reg, MVM_RETURN_INT, inv_arg_callsite); tc->cur_frame->args[0].o = obj; if (flip) { tc->cur_frame->special_return = flip_return; tc->cur_frame->special_return_data = res_reg; } STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args); } else { /* Need to set up special return hook. */ BoolMethReturnData *data = MVM_malloc(sizeof(BoolMethReturnData)); data->true_addr = true_addr; data->false_addr = false_addr; data->flip = flip; tc->cur_frame->special_return = boolify_return; tc->cur_frame->special_return_data = data; MVM_args_setup_thunk(tc, &data->res_reg, MVM_RETURN_INT, inv_arg_callsite); tc->cur_frame->args[0].o = obj; STABLE(code)->invoke(tc, code, inv_arg_callsite, tc->cur_frame->args); } return; } case MVM_BOOL_MODE_UNBOX_INT: result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0 ? 0 : 1; break; case MVM_BOOL_MODE_UNBOX_NUM: result = !IS_CONCRETE(obj) || REPR(obj)->box_funcs.get_num(tc, STABLE(obj), obj, OBJECT_BODY(obj)) == 0.0 ? 0 : 1; break; case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY: { MVMString *str; if (!IS_CONCRETE(obj)) { result = 0; break; } str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)); result = MVM_coerce_istrue_s(tc, str); break; } case MVM_BOOL_MODE_UNBOX_STR_NOT_EMPTY_OR_ZERO: { MVMString *str; MVMint64 chars; if (!IS_CONCRETE(obj)) { result = 0; break; } str = REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)); if (str == NULL || !IS_CONCRETE(str)) { result = 0; break; } chars = MVM_string_graphs(tc, str); result = chars == 0 || (chars == 1 && MVM_string_get_grapheme_at_nocheck(tc, str, 0) == 48) ? 0 : 1; break; } case MVM_BOOL_MODE_NOT_TYPE_OBJECT: result = !IS_CONCRETE(obj) ? 0 : 1; break; case MVM_BOOL_MODE_BIGINT: result = IS_CONCRETE(obj) ? MVM_bigint_bool(tc, obj) : 0; break; case MVM_BOOL_MODE_ITER: result = IS_CONCRETE(obj) ? MVM_iter_istrue(tc, (MVMIter *)obj) : 0; break; case MVM_BOOL_MODE_HAS_ELEMS: result = IS_CONCRETE(obj) ? MVM_repr_elems(tc, obj) != 0 : 0; break; default: MVM_exception_throw_adhoc(tc, "Invalid boolification spec mode used"); } } if (flip) result = result ? 0 : 1; if (res_reg) { res_reg->i64 = result; } else { if (result) *(tc->interp_cur_op) = true_addr; else *(tc->interp_cur_op) = false_addr; } }
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) { return str == NULL || !IS_CONCRETE(str) || MVM_string_graphs(tc, str) == 0 ? 0 : 1; }
/* Encodes the specified substring to latin-1. Anything outside of latin-1 range * will become a ?. The result string is NULL terminated, but the specified * size is the non-null part. */ char * MVM_string_latin1_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length, MVMString *replacement, MVMint32 translate_newlines) { /* Latin-1 is a single byte encoding, but \r\n is a 2-byte grapheme, so we * may have to resize as we go. */ MVMuint32 startu = (MVMuint32)start; MVMStringIndex strgraphs = MVM_string_graphs(tc, str); MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length); MVMuint8 *result; size_t result_alloc; MVMuint8 *repl_bytes = NULL; MVMuint64 repl_length; /* must check start first since it's used in the length check */ if (start < 0 || start > strgraphs) MVM_exception_throw_adhoc(tc, "start out of range"); if (length < -1 || start + lengthu > strgraphs) MVM_exception_throw_adhoc(tc, "length out of range"); if (replacement) repl_bytes = (MVMuint8 *) MVM_string_latin1_encode_substr(tc, replacement, &repl_length, 0, -1, NULL, translate_newlines); result_alloc = lengthu; result = MVM_malloc(result_alloc + 1); if (str->body.storage_type == MVM_STRING_GRAPHEME_ASCII) { /* No encoding needed; directly copy. */ memcpy(result, str->body.storage.blob_ascii, lengthu); result[lengthu] = 0; if (output_size) *output_size = lengthu; } else { MVMuint32 i = 0; MVMCodepointIter ci; MVM_string_ci_init(tc, &ci, str, translate_newlines); while (MVM_string_ci_has_more(tc, &ci)) { MVMCodepoint ord = MVM_string_ci_get_codepoint(tc, &ci); if (i == result_alloc) { result_alloc += 8; result = MVM_realloc(result, result_alloc + 1); } if (ord >= 0 && ord <= 255) { result[i] = (MVMuint8)ord; i++; } else if (replacement) { if (repl_length >= result_alloc || i >= result_alloc - repl_length) { result_alloc += repl_length; result = MVM_realloc(result, result_alloc + 1); } memcpy(result + i, repl_bytes, repl_length); i += repl_length; } else { MVM_free(result); MVM_free(repl_bytes); MVM_exception_throw_adhoc(tc, "Error encoding Latin-1 string: could not encode codepoint %d", ord); } } result[i] = 0; if (output_size) *output_size = i; } MVM_free(repl_bytes); return (char *)result; }
/* Set the line separator. */ static void set_separator(MVMThreadContext *tc, MVMOSHandle *h, MVMString *sep) { MVMIOFileData *data = (MVMIOFileData *)h->body.data; data->sep = (MVMGrapheme32)MVM_string_get_grapheme_at(tc, sep, MVM_string_graphs(tc, sep) - 1); }
static double parse_int_frac_exp(MVMThreadContext *tc, MVMCodepointIter *ci, MVMCodepoint *cp, MVMString* s, double radix, int leading_zero) { /* * What we do here is extract the digits from the original string, * effectively stripping off underscores and converting fancy Unicode * digits to regular ones. We then ASCII-fy those digits and stuff * them into digits_buf (along with double-ish things like the dot * and 'e'). At the end we give the resultant string to strtod() to * do all the dirty work for us, so we don't have to worry about * handling denormals or picking closest representable double */ int digits = 0; int frac_digits = 0; int digit; int ends_with_underscore = 0; char *digits_buf = (char *)MVM_malloc(1 + MVM_string_graphs(tc, s)); char *digits_buf_tail = digits_buf; double result; if (*cp == '_') parse_error(tc, s, "number can't start with _"); if (*cp != '.') { while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) { ends_with_underscore = *cp == '_'; if (*cp != '_') { if (digit >= radix) break; *digits_buf_tail++ = '0' + digit; digits++; } get_cp(tc, ci, cp); } if (ends_with_underscore) parse_error(tc, s, "a number can't end in underscore"); } if (*cp == '.') { *digits_buf_tail++ = '.'; get_cp(tc, ci, cp); if (*cp == '_') parse_error(tc, s, "radix point can't be followed by _"); while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) { ends_with_underscore = *cp == '_'; if (*cp != '_') { if (digit >= radix) break; *digits_buf_tail++ = '0' + digit; frac_digits++; } get_cp(tc, ci, cp); } if (frac_digits == 0) parse_error(tc, s, "radix point must be followed by one or more valid digits"); if (ends_with_underscore) parse_error(tc, s, "a number can't end in underscore"); } if (digits == 0 && frac_digits == 0 && !leading_zero) parse_error(tc, s, "expecting a number"); if (*cp == 'E' || *cp == 'e') { int e_digits = 0; *digits_buf_tail++ = 'e'; get_cp(tc, ci, cp); if (parse_sign(tc, ci, cp) == -1) *digits_buf_tail++ = '-'; if (*cp == '_') parse_error(tc, s, "'e' or 'E' can't be followed by _"); while (*cp == '_' || (digit = cp_value(tc, *cp)) != -1) { if (*cp != '_') { if (digit >= radix) break; *digits_buf_tail++ = '0' + digit; e_digits++; } get_cp(tc, ci, cp); } if (e_digits == 0) parse_error(tc, s, "'e' or 'E' must be followed by one or more valid digits"); } *digits_buf_tail = '\0'; result = strtod(digits_buf, NULL); MVM_free(digits_buf); return result; }
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { MVMObject *result; MVMint64 zvalue = 0; MVMint64 zbase = 1; MVMint64 chars = MVM_string_graphs(tc, str); MVMint64 value = zvalue; MVMint64 base = zbase; MVMint64 pos = -1; MVMuint16 neg = 0; MVMint64 ch; if (radix > 36) { MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix); } ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; if ((flag & 0x02) && (ch == '+' || ch == '-')) { neg = (ch == '-'); offset++; ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; } while (offset < chars) { if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { /* As of Unicode 6.0.0, we know that Nd category numerals are within * the range 0..9 */ /* the string returned for NUMERIC_VALUE contains a floating point * value, so atoi will stop on the . in the string. This is fine * though, since we'd have to truncate the float regardless. */ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); } else break; if (ch >= radix) break; zvalue = zvalue * radix + ch; zbase = zbase * radix; offset++; pos = offset; if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); if (ch != '_') continue; offset++; if (offset >= chars) break; ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); } if (neg || flag & 0x01) { value = -value; } /* initialize the object */ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); MVMROOT(tc, result, { MVMObject *box_type = MVM_hll_current(tc)->int_box_type; MVMROOT(tc, box_type, { MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, base); MVM_repr_push_o(tc, result, boxed); boxed = MVM_repr_box_int(tc, box_type, pos); MVM_repr_push_o(tc, result, boxed); }); });
/* Set the line separator. */ void MVM_io_syncstream_set_separator(MVMThreadContext *tc, MVMOSHandle *h, MVMString *sep) { /* For now, take last character. */ MVMIOSyncStreamData *data = (MVMIOSyncStreamData *)h->body.data; data->sep = (MVMGrapheme32)MVM_string_get_grapheme_at(tc, sep, MVM_string_graphs(tc, sep) - 1); }