MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { MVMObject *result; MVMnum64 zvalue = 0.0; MVMnum64 zbase = 1.0; MVMint64 chars = NUM_GRAPHS(str); MVMnum64 value = zvalue; MVMnum64 base = zbase; MVMint64 pos = -1; MVMuint16 neg = 0; MVMint64 ch; if (radix > 36) { MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix); } ch = (offset < chars) ? MVM_string_get_codepoint_at_nocheck(tc, str, offset) : 0; if ((flag & 0x02) && (ch == '+' || ch == '-')) { neg = (ch == '-'); offset++; ch = (offset < chars) ? MVM_string_get_codepoint_at_nocheck(tc, str, offset) : 0; } while (offset < chars) { if (ch >= '0' && ch <= '9') ch = ch - '0'; else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; else break; if (ch >= radix) break; zvalue = zvalue * radix + ch; zbase = zbase * radix; offset++; pos = offset; if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } if (offset >= chars) break; ch = MVM_string_get_codepoint_at_nocheck(tc, str, offset); if (ch != '_') continue; offset++; if (offset >= chars) break; ch = MVM_string_get_codepoint_at_nocheck(tc, str, offset); } if (neg || flag & 0x01) { value = -value; } /* initialize the object */ result = MVM_repr_alloc_init(tc, tc->instance->boot_types->BOOTNumArray); MVM_repr_push_n(tc, result, value); MVM_repr_push_n(tc, result, base); MVM_repr_push_n(tc, result, pos); return result; }
/* Encodes the specified substring to ASCII. Anything outside of ASCII range * will become a ?. The result string is NULL terminated, but the specified * size is the non-null part. */ MVMuint8 * MVM_string_ascii_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) { /* ASCII is a single byte encoding, so each grapheme will just become * a single byte. */ MVMuint32 startu = (MVMuint32)start; MVMStringIndex strgraphs = NUM_GRAPHS(str); MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length); MVMuint8 *result; size_t i; /* must check start first since it's used in the length check */ if (start < 0 || start > strgraphs) MVM_exception_throw_adhoc(tc, "start out of range"); if (length < -1 || start + lengthu > strgraphs) MVM_exception_throw_adhoc(tc, "length out of range"); result = malloc(lengthu + 1); for (i = 0; i < lengthu; i++) { MVMCodepoint32 ord = MVM_string_get_codepoint_at_nocheck(tc, str, start + i); if (ord >= 0 && ord <= 127) result[i] = (MVMuint8)ord; else result[i] = '?'; } result[i] = 0; if (output_size) *output_size = lengthu; return result; }
/* Encodes the specified substring to Windows-1252. Anything outside of Windows-1252 range * will become a ?. The result string is NULL terminated, but the specified * size is the non-null part. */ MVMuint8 * MVM_string_windows1252_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) { /* Windows-1252 is a single byte encoding, so each grapheme will just become * a single byte. */ MVMuint32 startu = (MVMuint32)start; MVMStringIndex strgraphs = NUM_GRAPHS(str); MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length); MVMuint8 *result; size_t i; /* must check start first since it's used in the length check */ if (start < 0 || start > strgraphs) MVM_exception_throw_adhoc(tc, "start out of range"); if (length < 0 || start + length > strgraphs) MVM_exception_throw_adhoc(tc, "length out of range"); result = malloc(length + 1); for (i = 0; i < length; i++) { MVMint32 codepoint = MVM_string_get_codepoint_at_nocheck(tc, str, start + i); if ((codepoint >= 0 && codepoint < 128) || (codepoint >= 152 && codepoint < 256)) { result[i] = (MVMuint8)codepoint; } else if (codepoint > 8364 || codepoint < 0) { result[i] = '?'; } else { result[i] = windows1252_cp_to_char(codepoint); } } result[i] = 0; if (output_size) *output_size = length; return result; }
/* finds the location of a codepoint in a string. Useful for small character class lookup */ MVMint64 MVM_string_index_of_codepoint(MVMThreadContext *tc, MVMString *a, MVMint64 codepoint) { size_t index = -1; while (++index < NUM_GRAPHS(a)) /* XXX make this use the traversal function */ if (MVM_string_get_codepoint_at_nocheck(tc, a, index) == codepoint) return index; return -1; }
/* returns the codepoint (could be a negative synthetic) at a given index of the string */ MVMint64 MVM_string_get_codepoint_at(MVMThreadContext *tc, MVMString *a, MVMint64 index) { MVMStringIndex agraphs; if (!IS_CONCRETE((MVMObject *)a)) { MVM_exception_throw_adhoc(tc, "codepoint_at needs a concrete string"); } agraphs = NUM_GRAPHS(a); if (index < 0 || index >= agraphs) MVM_exception_throw_adhoc(tc, "Invalid string index: max %lld, got %lld", agraphs - 1, index); return (MVMint64)MVM_string_get_codepoint_at_nocheck(tc, a, index); }
/* returns the codepoint without doing checks, for internal VM use only. */ MVMCodepoint32 MVM_string_get_codepoint_at_nocheck(MVMThreadContext *tc, MVMString *a, MVMint64 index) { MVMStringIndex idx = (MVMStringIndex)index; switch(STR_FLAGS(a)) { case MVM_STRING_TYPE_INT32: return a->body.int32s[idx]; case MVM_STRING_TYPE_UINT8: return (MVMCodepoint32)a->body.uint8s[idx]; case MVM_STRING_TYPE_ROPE: { MVMStrand *strand = a->body.strands + find_strand_index(a, idx); return MVM_string_get_codepoint_at_nocheck(tc, strand->string, idx - strand->compare_offset + strand->string_offset); } } MVM_exception_throw_adhoc(tc, "internal string corruption"); return 0; }
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) { return str == NULL || !IS_CONCRETE(str) || NUM_GRAPHS(str) == 0 || (NUM_GRAPHS(str) == 1 && MVM_string_get_codepoint_at_nocheck(tc, str, 0) == 48) ? 0 : 1; }