Example #1
0
MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
    MVMObject *result;
    MVMnum64 zvalue = 0.0;
    MVMnum64 zbase  = 1.0;
    MVMint64 chars  = NUM_GRAPHS(str);
    MVMnum64 value  = zvalue;
    MVMnum64 base   = zbase;
    MVMint64   pos  = -1;
    MVMuint16  neg  = 0;
    MVMint64   ch;

    if (radix > 36) {
        MVM_exception_throw_adhoc(tc, "Cannot convert radix of %d (max 36)", radix);
    }

    ch = (offset < chars) ? MVM_string_get_codepoint_at_nocheck(tc, str, offset) : 0;
    if ((flag & 0x02) && (ch == '+' || ch == '-')) {
        neg = (ch == '-');
        offset++;
        ch = (offset < chars) ? MVM_string_get_codepoint_at_nocheck(tc, str, offset) : 0;
    }

    while (offset < chars) {
        if (ch >= '0' && ch <= '9') ch = ch - '0';
        else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
        else break;
        if (ch >= radix) break;
        zvalue = zvalue * radix + ch;
        zbase = zbase * radix;
        offset++; pos = offset;
        if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
        if (offset >= chars) break;
        ch = MVM_string_get_codepoint_at_nocheck(tc, str, offset);
        if (ch != '_') continue;
        offset++;
        if (offset >= chars) break;
        ch = MVM_string_get_codepoint_at_nocheck(tc, str, offset);
    }

    if (neg || flag & 0x01) { value = -value; }

    /* initialize the object */
    result = MVM_repr_alloc_init(tc, tc->instance->boot_types->BOOTNumArray);

    MVM_repr_push_n(tc, result, value);
    MVM_repr_push_n(tc, result, base);
    MVM_repr_push_n(tc, result, pos);

    return result;
}
Example #2
0
/* Encodes the specified substring to ASCII. Anything outside of ASCII range
 * will become a ?. The result string is NULL terminated, but the specified
 * size is the non-null part. */
MVMuint8 * MVM_string_ascii_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) {
    /* ASCII is a single byte encoding, so each grapheme will just become
     * a single byte. */
    MVMuint32 startu = (MVMuint32)start;
    MVMStringIndex strgraphs = NUM_GRAPHS(str);
    MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length);
    MVMuint8 *result;
    size_t i;

    /* must check start first since it's used in the length check */
    if (start < 0 || start > strgraphs)
        MVM_exception_throw_adhoc(tc, "start out of range");
    if (length < -1 || start + lengthu > strgraphs)
        MVM_exception_throw_adhoc(tc, "length out of range");

    result = malloc(lengthu + 1);
    for (i = 0; i < lengthu; i++) {
        MVMCodepoint32 ord = MVM_string_get_codepoint_at_nocheck(tc, str, start + i);
        if (ord >= 0 && ord <= 127)
            result[i] = (MVMuint8)ord;
        else
            result[i] = '?';
    }
    result[i] = 0;
    if (output_size)
        *output_size = lengthu;
    return result;
}
Example #3
0
/* Encodes the specified substring to Windows-1252. Anything outside of Windows-1252 range
 * will become a ?. The result string is NULL terminated, but the specified
 * size is the non-null part. */
MVMuint8 * MVM_string_windows1252_encode_substr(MVMThreadContext *tc, MVMString *str, MVMuint64 *output_size, MVMint64 start, MVMint64 length) {
    /* Windows-1252 is a single byte encoding, so each grapheme will just become
     * a single byte. */
    MVMuint32 startu = (MVMuint32)start;
    MVMStringIndex strgraphs = NUM_GRAPHS(str);
    MVMuint32 lengthu = (MVMuint32)(length == -1 ? strgraphs - startu : length);
    MVMuint8 *result;
    size_t i;

    /* must check start first since it's used in the length check */
    if (start < 0 || start > strgraphs)
        MVM_exception_throw_adhoc(tc, "start out of range");
    if (length < 0 || start + length > strgraphs)
        MVM_exception_throw_adhoc(tc, "length out of range");

    result = malloc(length + 1);
    for (i = 0; i < length; i++) {
        MVMint32 codepoint = MVM_string_get_codepoint_at_nocheck(tc, str, start + i);
        if ((codepoint >= 0 && codepoint < 128) || (codepoint >= 152 && codepoint < 256)) {
            result[i] = (MVMuint8)codepoint;
        }
        else if (codepoint > 8364 || codepoint < 0) {
            result[i] = '?';
        }
        else {
            result[i] = windows1252_cp_to_char(codepoint);
        }
    }
    result[i] = 0;
    if (output_size)
        *output_size = length;
    return result;
}
Example #4
0
File: ops.c Project: bingos/MoarVM
/* finds the location of a codepoint in a string.  Useful for small character class lookup */
MVMint64 MVM_string_index_of_codepoint(MVMThreadContext *tc, MVMString *a, MVMint64 codepoint) {
    size_t index = -1;
    while (++index < NUM_GRAPHS(a))
        /* XXX make this use the traversal function */
        if (MVM_string_get_codepoint_at_nocheck(tc, a, index) == codepoint)
            return index;
    return -1;
}
Example #5
0
File: ops.c Project: bingos/MoarVM
/* returns the codepoint (could be a negative synthetic) at a given index of the string */
MVMint64 MVM_string_get_codepoint_at(MVMThreadContext *tc, MVMString *a, MVMint64 index) {
    MVMStringIndex agraphs;
    
    if (!IS_CONCRETE((MVMObject *)a)) {
        MVM_exception_throw_adhoc(tc, "codepoint_at needs a concrete string");
    }
    
    agraphs = NUM_GRAPHS(a);
    
    if (index < 0 || index >= agraphs)
        MVM_exception_throw_adhoc(tc, "Invalid string index: max %lld, got %lld",
            agraphs - 1, index);
    return (MVMint64)MVM_string_get_codepoint_at_nocheck(tc, a, index);
}
Example #6
0
File: ops.c Project: bingos/MoarVM
/* returns the codepoint without doing checks, for internal VM use only. */
MVMCodepoint32 MVM_string_get_codepoint_at_nocheck(MVMThreadContext *tc, MVMString *a, MVMint64 index) {
    MVMStringIndex idx = (MVMStringIndex)index;
    
    switch(STR_FLAGS(a)) {
        case MVM_STRING_TYPE_INT32:
            return a->body.int32s[idx];
        case MVM_STRING_TYPE_UINT8:
            return (MVMCodepoint32)a->body.uint8s[idx];
        case MVM_STRING_TYPE_ROPE: {
            MVMStrand *strand = a->body.strands + find_strand_index(a, idx);
            return MVM_string_get_codepoint_at_nocheck(tc,
                strand->string, idx - strand->compare_offset + strand->string_offset);
        }
    }
    MVM_exception_throw_adhoc(tc, "internal string corruption");
    return 0;
}
Example #7
0
MVMint64 MVM_coerce_istrue_s(MVMThreadContext *tc, MVMString *str) {
    return str == NULL || !IS_CONCRETE(str) || NUM_GRAPHS(str) == 0 || (NUM_GRAPHS(str) == 1 && MVM_string_get_codepoint_at_nocheck(tc, str, 0) == 48) ? 0 : 1;
}