Esempio n. 1
0
//
//  Compare_Vector: C
//
REBINT Compare_Vector(const RELVAL *v1, const RELVAL *v2)
{
    REBCNT l1 = VAL_LEN_AT(v1);
    REBCNT l2 = VAL_LEN_AT(v2);
    REBCNT len = MIN(l1, l2);
    REBCNT n;
    REBU64 i1;
    REBU64 i2;
    REBYTE *d1 = SER_DATA_RAW(VAL_SERIES(v1));
    REBYTE *d2 = SER_DATA_RAW(VAL_SERIES(v2));
    REBCNT b1 = VECT_TYPE(VAL_SERIES(v1));
    REBCNT b2 = VECT_TYPE(VAL_SERIES(v2));

    if ((b1 >= VTSF08 && b2 < VTSF08) || (b2 >= VTSF08 && b1 < VTSF08))
        fail (Error(RE_NOT_SAME_TYPE));

    for (n = 0; n < len; n++) {
        i1 = get_vect(b1, d1, n + VAL_INDEX(v1));
        i2 = get_vect(b2, d2, n + VAL_INDEX(v2));
        if (i1 != i2) break;
    }

    if (n != len) {
        if (i1 > i2) return 1;
        return -1;
    }

    return l1 - l2;
}
Esempio n. 2
0
//
//  MAKE_Function: C
// 
// For REB_FUNCTION and "make spec", there is a function spec block and then
// a block of Rebol code implementing that function.  In that case we expect
// that `def` should be:
// 
//     [[spec] [body]]
// 
// With REB_COMMAND, the code is implemented via a C DLL, under a system of
// APIs that pre-date Rebol's open sourcing and hence Ren/C:
// 
//     [[spec] extension command-num]
// 
// See notes in Make_Command() regarding that mechanism and meaning.
//
void MAKE_Function(REBVAL *out, enum Reb_Kind kind, const REBVAL *arg)
{
    assert(kind == REB_FUNCTION);

    if (
        !IS_BLOCK(arg)
        || VAL_LEN_AT(arg) != 2
        || !IS_BLOCK(VAL_ARRAY_AT(arg))
        || !IS_BLOCK(VAL_ARRAY_AT(arg) + 1)
    ){
        fail (Error_Bad_Make(kind, arg));
    }

    REBVAL spec;
    COPY_VALUE(&spec, VAL_ARRAY_AT(arg), VAL_SPECIFIER(arg));

    REBVAL body;
    COPY_VALUE(&body, VAL_ARRAY_AT(arg) + 1, VAL_SPECIFIER(arg));

    // Spec-constructed functions do *not* have definitional returns
    // added automatically.  They are part of the generators.  So the
    // behavior comes--as with any other generator--from the projected
    // code (though round-tripping it via text is not possible in
    // general in any case due to loss of bindings.)
    //
    REBFUN *fun = Make_Interpreted_Function_May_Fail(
        &spec, &body, MKF_ANY_VALUE
    );

    *out = *FUNC_VALUE(fun);
}
Esempio n. 3
0
//
//  Partial1: C
// 
// Process the /part (or /skip) and other length modifying
// arguments.
//
REBINT Partial1(REBVAL *sval, REBVAL *lval)
{
    REBI64 len;
    REBINT maxlen;
    REBINT is_ser = ANY_SERIES(sval);

    // If lval is not set or is BAR!, use the current len of the target value:
    if (IS_UNSET(lval) || IS_BAR(lval)) {
        if (!is_ser) return 1;
        if (VAL_INDEX(sval) >= VAL_LEN_HEAD(sval)) return 0;
        return (VAL_LEN_HEAD(sval) - VAL_INDEX(sval));
    }
    if (IS_INTEGER(lval) || IS_DECIMAL(lval)) len = Int32(lval);
    else {
        if (is_ser && VAL_TYPE(sval) == VAL_TYPE(lval) && VAL_SERIES(sval) == VAL_SERIES(lval))
            len = (REBINT)VAL_INDEX(lval) - (REBINT)VAL_INDEX(sval);
        else
            fail (Error(RE_INVALID_PART, lval));
    }

    if (is_ser) {
        // Restrict length to the size available:
        if (len >= 0) {
            maxlen = (REBINT)VAL_LEN_AT(sval);
            if (len > maxlen) len = maxlen;
        } else {
            len = -len;
            if (len > (REBINT)VAL_INDEX(sval)) len = (REBINT)VAL_INDEX(sval);
            VAL_INDEX(sval) -= (REBCNT)len;
        }
    }

    return (REBINT)len;
}
Esempio n. 4
0
static int Check_Char_Range(REBVAL *val, REBINT limit)
{
    REBCNT len;

    if (IS_CHAR(val)) {
        if (VAL_CHAR(val) > limit) return R_FALSE;
        return R_TRUE;
    }

    if (IS_INTEGER(val)) {
        if (VAL_INT64(val) > limit) return R_FALSE;
        return R_TRUE;
    }

    len = VAL_LEN_AT(val);
    if (VAL_BYTE_SIZE(val)) {
        REBYTE *bp = VAL_BIN_AT(val);
        if (limit == 0xff) return R_TRUE; // by definition
        for (; len > 0; len--, bp++)
            if (*bp > limit) return R_FALSE;
    } else {
        REBUNI *up = VAL_UNI_AT(val);
        for (; len > 0; len--, up++)
            if (*up > limit) return R_FALSE;
    }

    return R_TRUE;
}
Esempio n. 5
0
//
//  Find_Max_Bit: C
// 
// Return integer number for the maximum bit number defined by
// the value. Used to determine how much space to allocate.
//
REBINT Find_Max_Bit(REBVAL *val)
{
    REBINT maxi = 0;
    REBINT n;

    switch (VAL_TYPE(val)) {

    case REB_CHAR:
        maxi = VAL_CHAR(val)+1;
        break;

    case REB_INTEGER:
        maxi = Int32s(val, 0);
        break;

    case REB_STRING:
    case REB_FILE:
    case REB_EMAIL:
    case REB_URL:
    case REB_TAG:
//  case REB_ISSUE:
        n = VAL_INDEX(val);
        if (VAL_BYTE_SIZE(val)) {
            REBYTE *bp = VAL_BIN(val);
            for (; n < cast(REBINT, VAL_LEN_HEAD(val)); n++)
                if (bp[n] > maxi) maxi = bp[n];
        }
        else {
            REBUNI *up = VAL_UNI(val);
            for (; n < cast(REBINT, VAL_LEN_HEAD(val)); n++)
                if (up[n] > maxi) maxi = up[n];
        }
        maxi++;
        break;

    case REB_BINARY:
        maxi = VAL_LEN_AT(val) * 8 - 1;
        if (maxi < 0) maxi = 0;
        break;

    case REB_BLOCK:
        for (val = VAL_ARRAY_AT(val); NOT_END(val); val++) {
            n = Find_Max_Bit(val);
            if (n > maxi) maxi = n;
        }
        //maxi++;
        break;

    case REB_NONE:
        maxi = 0;
        break;

    default:
        return -1;
    }

    return maxi;
}
Esempio n. 6
0
//
//  MAKE_String: C
//
void MAKE_String(REBVAL *out, enum Reb_Kind kind, const REBVAL *def) {
    REBSER *ser; // goto would cross initialization

    if (IS_INTEGER(def)) {
        //
        // !!! R3-Alpha tolerated decimal, e.g. `make string! 3.14`, which
        // is semantically nebulous (round up, down?) and generally bad.
        //
        ser = Make_Binary(Int32s(def, 0));
        Val_Init_Series(out, kind, ser);
        return;
    }
    else if (IS_BLOCK(def)) {
        //
        // The construction syntax for making strings or binaries that are
        // preloaded with an offset into the data is #[binary [#{0001} 2]].
        // In R3-Alpha make definitions didn't have to be a single value
        // (they are for compatibility between construction syntax and MAKE
        // in Ren-C).  So the positional syntax was #[binary! #{0001} 2]...
        // while #[binary [#{0001} 2]] would join the pieces together in order
        // to produce #{000102}.  That behavior is not available in Ren-C.

        if (VAL_ARRAY_LEN_AT(def) != 2)
            goto bad_make;

        RELVAL *any_binstr = VAL_ARRAY_AT(def);
        if (!ANY_BINSTR(any_binstr))
            goto bad_make;
        if (IS_BINARY(any_binstr) != LOGICAL(kind == REB_BINARY))
            goto bad_make;

        RELVAL *index = VAL_ARRAY_AT(def) + 1;
        if (!IS_INTEGER(index))
            goto bad_make;

        REBINT i = Int32(index) - 1 + VAL_INDEX(any_binstr);
        if (i < 0 || i > cast(REBINT, VAL_LEN_AT(any_binstr)))
            goto bad_make;

        Val_Init_Series_Index(out, kind, VAL_SERIES(any_binstr), i);
        return;
    }

    if (kind == REB_BINARY)
        ser = make_binary(def, TRUE);
    else
        ser = MAKE_TO_String_Common(def);

    if (!ser)
        goto bad_make;

    Val_Init_Series_Index(out, kind, ser, 0);
    return;

bad_make:
    fail (Error_Bad_Make(kind, def));
}
Esempio n. 7
0
//
//  Partial: C
// 
// Args:
//     aval: target value
//     bval: argument to modify target (optional)
//     lval: length value (or none)
// 
// Determine the length of a /PART value. It can be:
//     1. integer or decimal
//     2. relative to A value (bval is null)
//     3. relative to B value
// 
// NOTE: Can modify the value's index!
// The result can be negative. ???
//
REBINT Partial(REBVAL *aval, REBVAL *bval, REBVAL *lval)
{
    REBVAL *val;
    REBINT len;
    REBINT maxlen;

    // If lval is unset, use the current len of the target value:
    if (IS_UNSET(lval)) {
        val = (bval && ANY_SERIES(bval)) ? bval : aval;
        if (VAL_INDEX(val) >= VAL_LEN_HEAD(val)) return 0;
        return (VAL_LEN_HEAD(val) - VAL_INDEX(val));
    }

    if (IS_INTEGER(lval) || IS_DECIMAL(lval)) {
        len = Int32(lval);
        val = bval;
    }
    else {
        // So, lval must be relative to aval or bval series:
        if (
            VAL_TYPE(aval) == VAL_TYPE(lval)
            && VAL_SERIES(aval) == VAL_SERIES(lval)
        ) {
            val = aval;
        }
        else if (
            bval
            && VAL_TYPE(bval) == VAL_TYPE(lval)
            && VAL_SERIES(bval) == VAL_SERIES(lval)
        ) {
            val = bval;
        }
        else
            fail (Error(RE_INVALID_PART, lval));

        len = cast(REBINT, VAL_INDEX(lval)) - cast(REBINT, VAL_INDEX(val));
    }

    if (!val) val = aval;

    // Restrict length to the size available
    //
    if (len >= 0) {
        maxlen = (REBINT)VAL_LEN_AT(val);
        if (len > maxlen) len = maxlen;
    }
    else {
        len = -len;
        if (len > cast(REBINT, VAL_INDEX(val)))
            len = cast(REBINT, VAL_INDEX(val));
        VAL_INDEX(val) -= (REBCNT)len;
    }

    return len;
}
Esempio n. 8
0
//
//  Cloak: C
// 
// Simple data scrambler. Quality depends on the key length.
// Result is made in place (data string).
// 
// The key (kp) is passed as a REBVAL or REBYTE (when klen is !0).
//
REBOOL Cloak(REBOOL decode, REBYTE *cp, REBCNT dlen, REBYTE *kp, REBCNT klen, REBOOL as_is)
{
    REBCNT i, n;
    REBYTE src[20];
    REBYTE dst[20];

    if (dlen == 0) return TRUE;

    // Decode KEY as VALUE field (binary, string, or integer)
    if (klen == 0) {
        REBVAL *val = (REBVAL*)kp;
        REBSER *ser;

        switch (VAL_TYPE(val)) {
        case REB_BINARY:
            kp = VAL_BIN_AT(val);
            klen = VAL_LEN_AT(val);
            break;
        case REB_STRING:
            ser = Temp_Bin_Str_Managed(val, &i, &klen);
            kp = BIN_AT(ser, i);
            break;
        case REB_INTEGER:
            INT_TO_STR(VAL_INT64(val), dst);
            klen = LEN_BYTES(dst);
            as_is = FALSE;
            break;
        }

        if (klen == 0) return FALSE;
    }

    if (!as_is) {
        for (i = 0; i < 20; i++) src[i] = kp[i % klen];
        SHA1(src, 20, dst);
        klen = 20;
        kp = dst;
    }

    if (decode)
        for (i = dlen-1; i > 0; i--) cp[i] ^= cp[i-1] ^ kp[i % klen];

    // Change starting byte based all other bytes.
    n = 0xa5;
    for (i = 1; i < dlen; i++) n += cp[i];
    cp[0] ^= (REBYTE)n;

    if (!decode)
        for (i = 1; i < dlen; i++) cp[i] ^= cp[i-1] ^ kp[i % klen];

    return TRUE;
}
Esempio n. 9
0
static REBSER *MAKE_TO_String_Common(const REBVAL *arg)
{
    REBSER *ser = 0;

    // MAKE/TO <type> <binary!>
    if (IS_BINARY(arg)) {
        REBYTE *bp = VAL_BIN_AT(arg);
        REBCNT len = VAL_LEN_AT(arg);
        switch (What_UTF(bp, len)) {
        case 0:
            break;
        case 8: // UTF-8 encoded
            bp  += 3;
            len -= 3;
            break;
        default:
            fail (Error(RE_BAD_UTF8));
        }
        ser = Decode_UTF_String(bp, len, 8); // UTF-8
    }
    // MAKE/TO <type> <any-string>
    else if (ANY_BINSTR(arg)) {
        ser = Copy_String_Slimming(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN_AT(arg));
    }
    // MAKE/TO <type> <any-word>
    else if (ANY_WORD(arg)) {
        ser = Copy_Mold_Value(arg, 0 /* opts... MOPT_0? */);
    }
    // MAKE/TO <type> #"A"
    else if (IS_CHAR(arg)) {
        ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2);
        Append_Codepoint_Raw(ser, VAL_CHAR(arg));
    }
    else
        ser = Copy_Form_Value(arg, 1 << MOPT_TIGHT);

    return ser;
}
Esempio n. 10
0
//
//  Shuffle_String: C
//
// Randomize a string. Return a new string series.
// Handles both BYTE and UNICODE strings.
//
void Shuffle_String(REBVAL *value, bool secure)
{
    REBSTR *s = VAL_STRING(value);
    REBCNT idx = VAL_INDEX(value);

    REBCNT n;
    for (n = VAL_LEN_AT(value); n > 1;) {
        REBCNT k = idx + cast(REBCNT, Random_Int(secure)) % n;
        n--;
        REBUNI swap = GET_CHAR_AT(s, k);
        SET_CHAR_AT(s, k, GET_CHAR_AT(s, n + idx));
        SET_CHAR_AT(s, n + idx, swap);
    }
}
Esempio n. 11
0
//
//  Binary_To_Decimal: C
//
static void Binary_To_Decimal(const REBVAL *bin, REBVAL *out)
{
    REBI64 n = 0;
    REBSER *ser = VAL_SERIES(bin);
    REBCNT idx = VAL_INDEX(bin);
    REBCNT len = VAL_LEN_AT(bin);

    if (len > 8) len = 8;

    for (; len; len--, idx++) n = (n << 8) | (REBI64)(GET_ANY_CHAR(ser, idx));

    VAL_RESET_HEADER(out, REB_DECIMAL);
    INIT_DECIMAL_BITS(out, n);
}
Esempio n. 12
0
//
//  Complement_Binary: C
//
// Only valid for BINARY data.
//
REBSER *Complement_Binary(REBVAL *value)
{
    const REBYTE *bp = VAL_BIN_AT(value);
    REBCNT len = VAL_LEN_AT(value);

    REBSER *bin = Make_Binary(len);
    TERM_SEQUENCE_LEN(bin, len);

    REBYTE *dp = BIN_HEAD(bin);
    for (; len > 0; len--, ++bp, ++dp)
        *dp = ~(*bp);

    return bin;
}
Esempio n. 13
0
//
//  Split_Lines: C
//
// Given a string series, split lines on CR-LF.  Give back array of strings.
//
// Note: The definition of "line" in POSIX is a sequence of characters that
// end with a newline.  Hence, the last line of a file should have a newline
// marker, or it's not a "line")
//
// https://stackoverflow.com/a/729795
//
// This routine does not require it.
//
// !!! CR support is likely to be removed...and CR will be handled as a normal
// character, with special code needed to process it.
//
REBARR *Split_Lines(const REBVAL *str)
{
    REBDSP dsp_orig = DSP;

    REBCNT len = VAL_LEN_AT(str);
    REBCNT i = VAL_INDEX(str);
    if (i == len)
        return Make_Array(0);

    DECLARE_MOLD (mo);
    Push_Mold(mo);

    REBCHR(const*) cp = VAL_STRING_AT(str);

    REBUNI c;
    cp = NEXT_CHR(&c, cp);

    for (; i < len; ++i, cp = NEXT_CHR(&c, cp)) {
        if (c != LF && c != CR) {
            Append_Codepoint(mo->series, c);
            continue;
        }

        Init_Text(DS_PUSH(), Pop_Molded_String(mo));
        SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE);

        Push_Mold(mo);

        if (c == CR) {
            REBCHR(const*) tp = NEXT_CHR(&c, cp);
            if (c == LF) {
                ++i;
                cp = tp; // treat CR LF as LF, lone CR as LF
            }
        }
    }

    // If there's any remainder we pushed in the buffer, consider the end of
    // string to be an implicit line-break

    if (STR_SIZE(mo->series) == mo->offset)
        Drop_Mold(mo);
    else {
        Init_Text(DS_PUSH(), Pop_Molded_String(mo));
        SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE);
    }

    return Pop_Stack_Values_Core(dsp_orig, ARRAY_FLAG_NEWLINE_AT_TAIL);
}
Esempio n. 14
0
//
//  Shuffle_String: C
// 
// Randomize a string. Return a new string series.
// Handles both BYTE and UNICODE strings.
//
void Shuffle_String(REBVAL *value, REBOOL secure)
{
    REBCNT n;
    REBCNT k;
    REBSER *series = VAL_SERIES(value);
    REBCNT idx     = VAL_INDEX(value);
    REBUNI swap;

    for (n = VAL_LEN_AT(value); n > 1;) {
        k = idx + (REBCNT)Random_Int(secure) % n;
        n--;
        swap = GET_ANY_CHAR(series, k);
        SET_ANY_CHAR(series, k, GET_ANY_CHAR(series, n + idx));
        SET_ANY_CHAR(series, n + idx, swap);
    }
}
Esempio n. 15
0
//
//  Complement_Binary: C
// 
// Only valid for BINARY data.
//
REBSER *Complement_Binary(REBVAL *value)
{
        REBSER *series;
        REBYTE *str = VAL_BIN_AT(value);
        REBINT len = VAL_LEN_AT(value);
        REBYTE *out;

        series = Make_Binary(len);
        SET_SERIES_LEN(series, len);
        out = BIN_HEAD(series);
        for (; len > 0; len--) {
            *out++ = ~(*str);
            ++str;
        }

        return series;
}
Esempio n. 16
0
//
//  Bin_To_Money_May_Fail: C
//
// Will successfully convert or fail (longjmp) with an error.
//
void Bin_To_Money_May_Fail(REBVAL *result, REBVAL *val)
{
    REBCNT len;
    REBYTE buf[MAX_HEX_LEN+4] = {0}; // binary to convert

    if (IS_BINARY(val)) {
        len = VAL_LEN_AT(val);
        if (len > 12) len = 12;
        memcpy(buf, VAL_BIN_AT(val), len);
    }
    else
        fail (Error_Invalid_Arg(val));

    memcpy(buf + 12 - len, buf, len); // shift to right side
    memset(buf, 0, 12 - len);
    VAL_MONEY_AMOUNT(result) = binary_to_deci(buf);
}
Esempio n. 17
0
//
//  Shuffle_Vector: C
//
void Shuffle_Vector(REBVAL *vect, REBOOL secure)
{
    REBCNT n;
    REBCNT k;
    REBU64 swap;
    REBYTE *data = SER_DATA_RAW(VAL_SERIES(vect));
    REBCNT type = VECT_TYPE(VAL_SERIES(vect));
    REBCNT idx = VAL_INDEX(vect);

    // We can do it as INTS, because we just deal with the bits:
    if (type == VTSF32) type = VTUI32;
    else if (type == VTSF64) type = VTUI64;

    for (n = VAL_LEN_AT(vect); n > 1;) {
        k = idx + (REBCNT)Random_Int(secure) % n;
        n--;
        swap = get_vect(type, data, k);
        set_vect(type, data, k, get_vect(type, data, n + idx), 0);
        set_vect(type, data, n + idx, swap, 0);
    }
}
Esempio n. 18
0
//
//  Vector_To_Array: C
// 
// Convert a vector to a block.
//
REBARR *Vector_To_Array(const REBVAL *vect)
{
    REBCNT len = VAL_LEN_AT(vect);
    REBYTE *data = SER_DATA_RAW(VAL_SERIES(vect));
    REBCNT type = VECT_TYPE(VAL_SERIES(vect));
    REBARR *array = NULL;
    REBCNT n;
    RELVAL *val;

    if (len <= 0)
        fail (Error_Invalid_Arg(vect));

    array = Make_Array(len);
    val = ARR_HEAD(array);
    for (n = VAL_INDEX(vect); n < VAL_LEN_HEAD(vect); n++, val++) {
        VAL_RESET_HEADER(val, (type >= VTSF08) ? REB_DECIMAL : REB_INTEGER);
        VAL_INT64(val) = get_vect(type, data, n); // can be int or decimal
    }

    TERM_ARRAY_LEN(array, len);
    assert(IS_END(val));

    return array;
}
Esempio n. 19
0
void Set_Vector_Row(REBSER *ser, REBVAL *blk)
{
    REBCNT idx = VAL_INDEX(blk);
    REBCNT len = VAL_LEN_AT(blk);
    RELVAL *val;
    REBCNT n = 0;
    REBCNT bits = VECT_TYPE(ser);
    REBI64 i = 0;
    REBDEC f = 0;

    if (IS_BLOCK(blk)) {
        val = VAL_ARRAY_AT(blk);

        for (; NOT_END(val); val++) {
            if (IS_INTEGER(val)) {
                i = VAL_INT64(val);
                if (bits > VTUI64) f = (REBDEC)(i);
            }
            else if (IS_DECIMAL(val)) {
                f = VAL_DECIMAL(val);
                if (bits <= VTUI64) i = (REBINT)(f);
            }
            else fail (Error_Invalid_Arg_Core(val, VAL_SPECIFIER(blk)));
            //if (n >= ser->tail) Expand_Vector(ser);
            set_vect(bits, SER_DATA_RAW(ser), n++, i, f);
        }
    }
    else {
        REBYTE *data = VAL_BIN_AT(blk);
        for (; len > 0; len--, idx++) {
            set_vect(
                bits, SER_DATA_RAW(ser), n++, cast(REBI64, data[idx]), f
            );
        }
    }
}
Esempio n. 20
0
//
//  Split_Lines: C
// 
// Given a string series, split lines on CR-LF.
// Series can be bytes or Unicode.
//
REBARR *Split_Lines(REBVAL *val)
{
    REBARR *array = BUF_EMIT; // GC protected (because it is emit buffer)
    REBSER *str = VAL_SERIES(val);
    REBCNT len = VAL_LEN_AT(val);
    REBCNT idx = VAL_INDEX(val);
    REBCNT start = idx;
    REBSER *out;
    REBUNI c;

    RESET_ARRAY(array);

    while (idx < len) {
        c = GET_ANY_CHAR(str, idx);
        if (c == LF || c == CR) {
            out = Copy_String_Slimming(str, start, idx - start);
            val = Alloc_Tail_Array(array);
            Val_Init_String(val, out);
            SET_VAL_FLAG(val, VALUE_FLAG_LINE);
            idx++;
            if (c == CR && GET_ANY_CHAR(str, idx) == LF)
                idx++;
            start = idx;
        }
        else idx++;
    }
    // Possible remainder (no terminator)
    if (idx > start) {
        out = Copy_String_Slimming(str, start, idx - start);
        val = Alloc_Tail_Array(array);
        Val_Init_String(val, out);
        SET_VAL_FLAG(val, VALUE_FLAG_LINE);
    }

    return Copy_Array_Shallow(array, SPECIFIED); // no relative values
}
Esempio n. 21
0
//
//  Temp_Bin_Str_Managed: C
// 
// Determines if UTF8 conversion is needed for a series before it
// is used with a byte-oriented function.
// 
// If conversion is needed, a UTF8 series will be created.  Otherwise,
// the source series is returned as-is.
// 
// Note: This routine should only be used to generate a value used
// for temporary purposes, because it has a "surprising variance"
// regarding its input.  If the value's series can be reused, it is--
// and this depends on an implementation detail of internal encoding
// that the user should not be aware of (they need not know if the
// internal representation of an ASCII string uses 1, 2, or however
// many bytes).  But copying vs. non-copying means the resulting
// data might or might not have previous values available to step
// back into from the originating series!
// 
// !!! Should performance dictate it, the callsites could be
// adapted to know whether this produced a new series or not, and
// instead of managing a created result they could be responsible
// for freeing it if so.
//
REBSER *Temp_Bin_Str_Managed(const REBVAL *val, REBCNT *index, REBCNT *length)
{
    REBCNT len = (length && *length) ? *length : VAL_LEN_AT(val);
    REBSER *series;

    assert(IS_BINARY(val) || ANY_STRING(val));

    // !!! This used to check `len == 0` and reuse a zero length string.
    // However, the zero length string could have the wrong width.  We are
    // expected to be returning a BYTE_SIZE() string, and that confused
    // things.  It's not a good idea to mutate the source string (e.g.
    // reallocate under a new width) so consider having an EMPTY_BYTE_STRING
    // like EMPTY_ARRAY which is protected to hand back.
    //
    if (
        IS_BINARY(val)
        || (
            VAL_BYTE_SIZE(val)
            && All_Bytes_ASCII(VAL_BIN_AT(val), VAL_LEN_AT(val))
        )
    ){
        //
        // It's BINARY!, or an ANY-STRING! whose codepoints are all values in
        // ASCII (0x00 => 0x7F), hence not needing any UTF-8 encoding.
        //
        series = VAL_SERIES(val);
        ASSERT_SERIES_MANAGED(series);

        if (index)
            *index = VAL_INDEX(val);
        if (length)
            *length = len;
    }
    else {
        // UTF-8 conversion is required, and we manage the result.

        series = Make_UTF8_From_Any_String(val, len, OPT_ENC_CRLF_MAYBE);
        MANAGE_SERIES(series);

    #if !defined(NDEBUG)
        //
        // Also, PROTECT the result in the debug build...because since the
        // caller doesn't know if a new series was created or if the initial
        // data is being used, they should not be modifying it!  (We don't
        // want to protect the original data, because we wouldn't know when
        // we were allowed to unlock it...there's no later call in this
        // model to clean up the series.)
        {
            REBVAL protect;
            Val_Init_String(&protect, series);

            Protect_Value(&protect, FLAGIT(PROT_SET));

            // just a string...not /DEEP...shouldn't need to Unmark()
        }
    #endif

        if (index)
            *index = 0;
        if (length)
            *length = SER_LEN(series);
    }

    assert(BYTE_SIZE(series));
    return series;
}
Esempio n. 22
0
//
//  Serial_Actor: C
//
static REB_R Serial_Actor(REBFRM *frame_, REBCTX *port, REBSYM action)
{
    REBREQ *req;    // IO request
    REBVAL *spec;   // port spec
    REBVAL *arg;    // action argument value
    REBVAL *val;    // e.g. port number value
    REBINT result;  // IO result
    REBCNT refs;    // refinement argument flags
    REBCNT len;     // generic length
    REBSER *ser;    // simplifier
    REBVAL *path;

    Validate_Port(port, action);

    *D_OUT = *D_ARG(1);

    // Validate PORT fields:
    spec = CTX_VAR(port, STD_PORT_SPEC);
    if (!IS_OBJECT(spec)) fail (Error(RE_INVALID_PORT));
    path = Obj_Value(spec, STD_PORT_SPEC_HEAD_REF);
    if (!path) fail (Error(RE_INVALID_SPEC, spec));

    //if (!IS_FILE(path)) fail (Error(RE_INVALID_SPEC, path));

    req = cast(REBREQ*, Use_Port_State(port, RDI_SERIAL, sizeof(*req)));

    // Actions for an unopened serial port:
    if (!IS_OPEN(req)) {

        switch (action) {

        case SYM_OPEN:
            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_PATH);
            if (! (IS_FILE(arg) || IS_STRING(arg) || IS_BINARY(arg)))
                fail (Error(RE_INVALID_PORT_ARG, arg));

            req->special.serial.path = ALLOC_N(REBCHR, MAX_SERIAL_DEV_PATH);
            OS_STRNCPY(
                req->special.serial.path,
                //
                // !!! This is assuming VAL_DATA contains native chars.
                // Should it? (2 bytes on windows, 1 byte on linux/mac)
                //
                SER_AT(REBCHR, VAL_SERIES(arg), VAL_INDEX(arg)),
                MAX_SERIAL_DEV_PATH
            );
            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_SPEED);
            if (! IS_INTEGER(arg))
                fail (Error(RE_INVALID_PORT_ARG, arg));

            req->special.serial.baud = VAL_INT32(arg);
            //Secure_Port(SYM_SERIAL, ???, path, ser);
            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_DATA_SIZE);
            if (!IS_INTEGER(arg)
                || VAL_INT64(arg) < 5
                || VAL_INT64(arg) > 8
            ) {
                fail (Error(RE_INVALID_PORT_ARG, arg));
            }
            req->special.serial.data_bits = VAL_INT32(arg);

            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_STOP_BITS);
            if (!IS_INTEGER(arg)
                || VAL_INT64(arg) < 1
                || VAL_INT64(arg) > 2
            ) {
                fail (Error(RE_INVALID_PORT_ARG, arg));
            }
            req->special.serial.stop_bits = VAL_INT32(arg);

            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_PARITY);
            if (IS_BLANK(arg)) {
                req->special.serial.parity = SERIAL_PARITY_NONE;
            } else {
                if (!IS_WORD(arg))
                    fail (Error(RE_INVALID_PORT_ARG, arg));

                switch (VAL_WORD_SYM(arg)) {
                    case SYM_ODD:
                        req->special.serial.parity = SERIAL_PARITY_ODD;
                        break;
                    case SYM_EVEN:
                        req->special.serial.parity = SERIAL_PARITY_EVEN;
                        break;
                    default:
                        fail (Error(RE_INVALID_PORT_ARG, arg));
                }
            }

            arg = Obj_Value(spec, STD_PORT_SPEC_SERIAL_FLOW_CONTROL);
            if (IS_BLANK(arg)) {
                req->special.serial.flow_control = SERIAL_FLOW_CONTROL_NONE;
            } else {
                if (!IS_WORD(arg))
                    fail (Error(RE_INVALID_PORT_ARG, arg));

                switch (VAL_WORD_SYM(arg)) {
                    case SYM_HARDWARE:
                        req->special.serial.flow_control = SERIAL_FLOW_CONTROL_HARDWARE;
                        break;
                    case SYM_SOFTWARE:
                        req->special.serial.flow_control = SERIAL_FLOW_CONTROL_SOFTWARE;
                        break;
                    default:
                        fail (Error(RE_INVALID_PORT_ARG, arg));
                }
            }

            if (OS_DO_DEVICE(req, RDC_OPEN))
                fail (Error_On_Port(RE_CANNOT_OPEN, port, -12));
            SET_OPEN(req);
            return R_OUT;

        case SYM_CLOSE:
            return R_OUT;

        case SYM_OPEN_Q:
            return R_FALSE;

        default:
            fail (Error_On_Port(RE_NOT_OPEN, port, -12));
        }
    }

    // Actions for an open socket:
    switch (action) {

    case SYM_READ:
        refs = Find_Refines(frame_, ALL_READ_REFS);

        // Setup the read buffer (allocate a buffer if needed):
        arg = CTX_VAR(port, STD_PORT_DATA);
        if (!IS_STRING(arg) && !IS_BINARY(arg)) {
            Val_Init_Binary(arg, Make_Binary(32000));
        }
        ser = VAL_SERIES(arg);
        req->length = SER_AVAIL(ser); // space available
        if (req->length < 32000/2) Extend_Series(ser, 32000);
        req->length = SER_AVAIL(ser);

        // This used STR_TAIL (obsolete, equivalent to BIN_TAIL) but was it
        // sure the series was byte sized?  Added in a check.
        assert(BYTE_SIZE(ser));
        req->common.data = BIN_TAIL(ser); // write at tail

        //if (SER_LEN(ser) == 0)
        req->actual = 0;  // Actual for THIS read, not for total.
#ifdef DEBUG_SERIAL
        printf("(max read length %d)", req->length);
#endif
        result = OS_DO_DEVICE(req, RDC_READ); // recv can happen immediately
        if (result < 0) fail (Error_On_Port(RE_READ_ERROR, port, req->error));
#ifdef DEBUG_SERIAL
        for (len = 0; len < req->actual; len++) {
            if (len % 16 == 0) printf("\n");
            printf("%02x ", req->common.data[len]);
        }
        printf("\n");
#endif
        *D_OUT = *arg;
        return R_OUT;

    case SYM_WRITE:
        refs = Find_Refines(frame_, ALL_WRITE_REFS);

        // Determine length. Clip /PART to size of string if needed.
        spec = D_ARG(2);
        len = VAL_LEN_AT(spec);
        if (refs & AM_WRITE_PART) {
            REBCNT n = Int32s(D_ARG(ARG_WRITE_LIMIT), 0);
            if (n <= len) len = n;
        }

        // Setup the write:
        *CTX_VAR(port, STD_PORT_DATA) = *spec;  // keep it GC safe
        req->length = len;
        req->common.data = VAL_BIN_AT(spec);
        req->actual = 0;

        //Print("(write length %d)", len);
        result = OS_DO_DEVICE(req, RDC_WRITE); // send can happen immediately
        if (result < 0) fail (Error_On_Port(RE_WRITE_ERROR, port, req->error));
        break;

    case SYM_UPDATE:
        // Update the port object after a READ or WRITE operation.
        // This is normally called by the WAKE-UP function.
        arg = CTX_VAR(port, STD_PORT_DATA);
        if (req->command == RDC_READ) {
            if (ANY_BINSTR(arg)) {
                SET_SERIES_LEN(
                    VAL_SERIES(arg),
                    VAL_LEN_HEAD(arg) + req->actual
                );
            }
        }
        else if (req->command == RDC_WRITE) {
            SET_BLANK(arg);  // Write is done.
        }
        return R_BLANK;

    case SYM_OPEN_Q:
        return R_TRUE;

    case SYM_CLOSE:
        if (IS_OPEN(req)) {
            OS_DO_DEVICE(req, RDC_CLOSE);
            SET_CLOSED(req);
        }
        break;

    default:
        fail (Error_Illegal_Action(REB_PORT, action));
    }

    return R_OUT;
}
Esempio n. 23
0
//
//  MAKE_Tuple: C
//
REB_R MAKE_Tuple(
    REBVAL *out,
    enum Reb_Kind kind,
    const REBVAL *opt_parent,
    const REBVAL *arg
){
    assert(kind == REB_TUPLE);
    if (opt_parent)
        fail (Error_Bad_Make_Parent(kind, opt_parent));

    if (IS_TUPLE(arg))
        return Move_Value(out, arg);

    RESET_CELL(out, REB_TUPLE, CELL_MASK_NONE);
    REBYTE *vp = VAL_TUPLE(out);

    // !!! Net lookup parses IP addresses out of `tcp://93.184.216.34` or
    // similar URL!s.  In Rebol3 these captures come back the same type
    // as the input instead of as STRING!, which was a latent bug in the
    // network code of the 12-Dec-2012 release:
    //
    // https://github.com/rebol/rebol/blob/master/src/mezz/sys-ports.r#L110
    //
    // All attempts to convert a URL!-flavored IP address failed.  Taking
    // URL! here fixes it, though there are still open questions.
    //
    if (IS_TEXT(arg) or IS_URL(arg)) {
        REBSIZ size;
        const REBYTE *bp
            = Analyze_String_For_Scan(&size, arg, MAX_SCAN_TUPLE);

        if (Scan_Tuple(out, bp, size) == nullptr)
            fail (arg);
        return out;
    }

    if (ANY_ARRAY(arg)) {
        REBCNT len = 0;
        REBINT n;

        RELVAL *item = VAL_ARRAY_AT(arg);

        for (; NOT_END(item); ++item, ++vp, ++len) {
            if (len >= MAX_TUPLE)
                goto bad_make;
            if (IS_INTEGER(item)) {
                n = Int32(item);
            }
            else if (IS_CHAR(item)) {
                n = VAL_CHAR(item);
            }
            else
                goto bad_make;

            if (n > 255 || n < 0)
                goto bad_make;
            *vp = n;
        }

        VAL_TUPLE_LEN(out) = len;

        for (; len < MAX_TUPLE; len++) *vp++ = 0;
        return out;
    }

    REBCNT alen;

    if (IS_ISSUE(arg)) {
        REBSTR *spelling = VAL_STRING(arg);
        const REBYTE *ap = STR_HEAD(spelling);
        size_t size = STR_SIZE(spelling); // UTF-8 len
        if (size & 1)
            fail (arg); // must have even # of chars
        size /= 2;
        if (size > MAX_TUPLE)
            fail (arg); // valid even for UTF-8
        VAL_TUPLE_LEN(out) = size;
        for (alen = 0; alen < size; alen++) {
            REBYTE decoded;
            if ((ap = Scan_Hex2(&decoded, ap)) == NULL)
                fail (arg);
            *vp++ = decoded;
        }
    }
    else if (IS_BINARY(arg)) {
        REBYTE *ap = VAL_BIN_AT(arg);
        REBCNT len = VAL_LEN_AT(arg);
        if (len > MAX_TUPLE) len = MAX_TUPLE;
        VAL_TUPLE_LEN(out) = len;
        for (alen = 0; alen < len; alen++) *vp++ = *ap++;
    }
    else
        fail (arg);

    for (; alen < MAX_TUPLE; alen++) *vp++ = 0;
    return out;

  bad_make:
    fail (Error_Bad_Make(REB_TUPLE, arg));
}
Esempio n. 24
0
//
//  Xandor_Binary: C
//
// Only valid for BINARY data.
//
REBSER *Xandor_Binary(const REBVAL *verb, REBVAL *value, REBVAL *arg)
{
    REBYTE *p0 = VAL_BIN_AT(value);
    REBYTE *p1 = VAL_BIN_AT(arg);

    REBCNT t0 = VAL_LEN_AT(value);
    REBCNT t1 = VAL_LEN_AT(arg);

    REBCNT mt = MIN(t0, t1); // smaller array size

    // !!! This used to say "For AND - result is size of shortest input:" but
    // the code was commented out
    /*
        if (verb == A_AND || (verb == 0 && t1 >= t0))
            t2 = mt;
        else
            t2 = MAX(t0, t1);
    */

    REBCNT t2 = MAX(t0, t1);

    REBSER *series;
    if (IS_BITSET(value)) {
        //
        // Although bitsets and binaries share some implementation here,
        // they have distinct allocation functions...and bitsets need
        // to set the REBSER.misc.negated union field (BITS_NOT) as
        // it would be illegal to read it if it were cleared via another
        // element of the union.
        //
        assert(IS_BITSET(arg));
        series = Make_Bitset(t2 * 8);
    }
    else {
        // Ordinary binary
        //
        series = Make_Binary(t2);
        TERM_SEQUENCE_LEN(series, t2);
    }

    REBYTE *p2 = BIN_HEAD(series);

    switch (VAL_WORD_SYM(verb)) {
    case SYM_INTERSECT: { // and
        REBCNT i;
        for (i = 0; i < mt; i++)
            *p2++ = *p0++ & *p1++;
        CLEAR(p2, t2 - mt);
        return series; }

    case SYM_UNION: { // or
        REBCNT i;
        for (i = 0; i < mt; i++)
            *p2++ = *p0++ | *p1++;
        break; }

    case SYM_DIFFERENCE: { // xor
        REBCNT i;
        for (i = 0; i < mt; i++)
            *p2++ = *p0++ ^ *p1++;
        break; }

    case SYM_EXCLUDE: { // !!! not a "type action", word manually in %words.r
        REBCNT i;
        for (i = 0; i < mt; i++)
            *p2++ = *p0++ & ~*p1++;
        if (t0 > t1)
            memcpy(p2, p0, t0 - t1); // residual from first only
        return series; }

    default:
        fail (Error_Cannot_Use_Raw(verb, Datatype_From_Kind(REB_BINARY)));
    }

    // Copy the residual
    //
    memcpy(p2, ((t0 > t1) ? p0 : p1), t2 - mt);
    return series;
}
Esempio n. 25
0
//
//  Analyze_String_For_Scan: C
//
// Locate beginning byte pointer and number of bytes to prepare a string
// into a form that can be used with a Scan_XXX routine.  Used for instance
// to MAKE DATE! from a STRING!.  Rules are:
//
//     1. it's actual content (less space, newlines) <= max len
//     2. it does not contain other values ("123 456")
//     3. it's not empty or only whitespace
//
const REBYTE *Analyze_String_For_Scan(
    REBSIZ *opt_size_out,
    const REBVAL *any_string,
    REBCNT max_len // maximum length in *codepoints*
){
    REBCHR(const*) up = VAL_STRING_AT(any_string);
    REBCNT index = VAL_INDEX(any_string);
    REBCNT len = VAL_LEN_AT(any_string);
    if (len == 0)
        fail (Error_Past_End_Raw());

    REBUNI c;

    // Skip leading whitespace
    //
    for (; index < len; ++index, --len) {
        up = NEXT_CHR(&c, up);
        if (not IS_SPACE(c))
            break;
    }

    // Skip up to max_len non-space characters.
    //
    REBCNT num_chars = 0;
    for (; len > 0;) {
        ++num_chars;
        --len;

        // The R3-Alpha code would fail with Error_Invalid_Chars_Raw() if
        // there were UTF-8 characters in most calls.  Only ANY-WORD! from
        // ANY-STRING! allowed it.  Though it's not clear why it wouldn't be
        // better to delegate to the scanning routine itself to give a
        // more pointed error... allow c >= 0x80 for now.

        if (num_chars > max_len)
            fail (Error_Too_Long_Raw());

        up = NEXT_CHR(&c, up);
        if (IS_SPACE(c)) {
            --len;
            break;
        }
    }

    // Rest better be just spaces
    //
    for (; len > 0; --len) {
        up = NEXT_CHR(&c, up);
        if (!IS_SPACE(c))
            fail (Error_Invalid_Chars_Raw());
    }

    if (num_chars == 0)
        fail (Error_Past_End_Raw());

    DECLARE_LOCAL (reindexed);
    Move_Value(reindexed, any_string);
    VAL_INDEX(reindexed) = index;

    return VAL_UTF8_AT(opt_size_out, reindexed);
}
Esempio n. 26
0
//
//  Modify_Array: C
//
// Returns new dst_idx
//
REBCNT Modify_Array(
    REBCNT action,          // INSERT, APPEND, CHANGE
    REBARR *dst_arr,        // target
    REBCNT dst_idx,         // position
    const REBVAL *src_val,  // source
    REBCNT flags,           // AN_ONLY, AN_PART
    REBINT dst_len,         // length to remove
    REBINT dups             // dup count
) {
    REBCNT tail = ARR_LEN(dst_arr);

    REBINT ilen = 1; // length to be inserted

    const RELVAL *src_rel;
    REBCTX *specifier;

    if (IS_VOID(src_val) || dups < 0) {
        // If they are effectively asking for "no action" then all we have
        // to do is return the natural index result for the operation.
        // (APPEND will return 0, insert the tail of the insertion...so index)

        return (action == SYM_APPEND) ? 0 : dst_idx;
    }

    if (action == SYM_APPEND || dst_idx > tail) dst_idx = tail;

    // Check /PART, compute LEN:
    if (!GET_FLAG(flags, AN_ONLY) && ANY_ARRAY(src_val)) {
        // Adjust length of insertion if changing /PART:
        if (action != SYM_CHANGE && GET_FLAG(flags, AN_PART))
            ilen = dst_len;
        else
            ilen = VAL_LEN_AT(src_val);

        // Are we modifying ourselves? If so, copy src_val block first:
        if (dst_arr == VAL_ARRAY(src_val)) {
            REBARR *copy = Copy_Array_At_Shallow(
                VAL_ARRAY(src_val), VAL_INDEX(src_val), VAL_SPECIFIER(src_val)
            );
            MANAGE_ARRAY(copy); // !!! Review: worth it to not manage and free?
            src_rel = ARR_HEAD(copy);
            specifier = SPECIFIED; // copy already specified it
        }
        else {
            src_rel = VAL_ARRAY_AT(src_val); // skips by VAL_INDEX values
            specifier = VAL_SPECIFIER(src_val);
        }
    }
    else {
        // use passed in RELVAL and specifier
        src_rel = src_val;
        specifier = SPECIFIED; // it's a REBVAL, not a RELVAL, so specified
    }

    REBINT size = dups * ilen; // total to insert

    if (action != SYM_CHANGE) {
        // Always expand dst_arr for INSERT and APPEND actions:
        Expand_Series(ARR_SERIES(dst_arr), dst_idx, size);
    }
    else {
        if (size > dst_len)
            Expand_Series(ARR_SERIES(dst_arr), dst_idx, size-dst_len);
        else if (size < dst_len && GET_FLAG(flags, AN_PART))
            Remove_Series(ARR_SERIES(dst_arr), dst_idx, dst_len-size);
        else if (size + dst_idx > tail) {
            EXPAND_SERIES_TAIL(ARR_SERIES(dst_arr), size - (tail - dst_idx));
        }
    }

    tail = (action == SYM_APPEND) ? 0 : size + dst_idx;

#if !defined(NDEBUG)
    if (IS_ARRAY_MANAGED(dst_arr)) {
        REBINT i;
        for (i = 0; i < ilen; ++i)
            ASSERT_VALUE_MANAGED(&src_rel[i]);
    }
#endif

    for (; dups > 0; dups--) {
        REBINT index = 0;
        for (; index < ilen; ++index, ++dst_idx) {
            COPY_VALUE(
                SINK(ARR_HEAD(dst_arr) + dst_idx),
                src_rel + index,
                specifier
            );
        }
    }
    TERM_ARRAY_LEN(dst_arr, ARR_LEN(dst_arr));

    ASSERT_ARRAY(dst_arr);

    return tail;
}
Esempio n. 27
0
//
//  Modify_String: C
// 
// Returns new dst_idx.
//
REBCNT Modify_String(
    REBCNT action,          // INSERT, APPEND, CHANGE
    REBSER *dst_ser,        // target
    REBCNT dst_idx,         // position
    const REBVAL *src_val,  // source
    REBFLGS flags,          // AN_PART
    REBINT dst_len,         // length to remove
    REBINT dups             // dup count
) {
    REBSER *src_ser = 0;
    REBCNT src_idx = 0;
    REBCNT src_len;
    REBCNT tail  = SER_LEN(dst_ser);
    REBINT size;        // total to insert
    REBOOL needs_free;
    REBINT limit;

    // For INSERT/PART and APPEND/PART
    if (action != SYM_CHANGE && GET_FLAG(flags, AN_PART))
        limit = dst_len; // should be non-negative
    else
        limit = -1;

    if (limit == 0 || dups < 0) return (action == SYM_APPEND) ? 0 : dst_idx;
    if (action == SYM_APPEND || dst_idx > tail) dst_idx = tail;

    // If the src_val is not a string, then we need to create a string:
    if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series
        if (IS_INTEGER(src_val)) {
            src_ser = Make_Series_Codepoint(Int8u(src_val));
            needs_free = TRUE;
            limit = -1;
        }
        else if (IS_BLOCK(src_val)) {
            src_ser = Join_Binary(src_val, limit); // NOTE: it's the shared FORM buffer!
            needs_free = FALSE;
            limit = -1;
        }
        else if (IS_CHAR(src_val)) {
            //
            // "UTF-8 was originally specified to allow codepoints with up to
            // 31 bits (or 6 bytes). But with RFC3629, this was reduced to 4
            // bytes max. to be more compatible to UTF-16."  So depending on
            // which RFC you consider "the UTF-8", max size is either 4 or 6.
            //
            src_ser = Make_Binary(6);
            SET_SERIES_LEN(
                src_ser,
                Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val))
            );
            needs_free = TRUE;
            limit = -1;
        }
        else if (ANY_STRING(src_val)) {
            src_len = VAL_LEN_AT(src_val);
            if (limit >= 0 && src_len > cast(REBCNT, limit))
                src_len = limit;
            src_ser = Make_UTF8_From_Any_String(src_val, src_len, 0);
            needs_free = TRUE;
            limit = -1;
        }
        else if (!IS_BINARY(src_val))
            fail (Error_Invalid_Arg(src_val));
    }
    else if (IS_CHAR(src_val)) {
        src_ser = Make_Series_Codepoint(VAL_CHAR(src_val));
        needs_free = TRUE;
    }
    else if (IS_BLOCK(src_val)) {
        src_ser = Form_Tight_Block(src_val);
        needs_free = TRUE;
    }
    else if (!ANY_STRING(src_val) || IS_TAG(src_val)) {
        src_ser = Copy_Form_Value(src_val, 0);
        needs_free = TRUE;
    }

    // Use either new src or the one that was passed:
    if (src_ser) {
        src_len = SER_LEN(src_ser);
    }
    else {
        src_ser = VAL_SERIES(src_val);
        src_idx = VAL_INDEX(src_val);
        src_len = VAL_LEN_AT(src_val);
        needs_free = FALSE;
    }

    if (limit >= 0) src_len = limit;

    // If Source == Destination we need to prevent possible conflicts.
    // Clone the argument just to be safe.
    // (Note: It may be possible to optimize special cases like append !!)
    if (dst_ser == src_ser) {
        assert(!needs_free);
        src_ser = Copy_Sequence_At_Len(src_ser, src_idx, src_len);
        needs_free = TRUE;
        src_idx = 0;
    }

    // Total to insert:
    size = dups * src_len;

    if (action != SYM_CHANGE) {
        // Always expand dst_ser for INSERT and APPEND actions:
        Expand_Series(dst_ser, dst_idx, size);
    } else {
        if (size > dst_len)
            Expand_Series(dst_ser, dst_idx, size - dst_len);
        else if (size < dst_len && GET_FLAG(flags, AN_PART))
            Remove_Series(dst_ser, dst_idx, dst_len - size);
        else if (size + dst_idx > tail) {
            EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx));
        }
    }

    // For dup count:
    for (; dups > 0; dups--) {
        Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE);
        dst_idx += src_len;
    }

    TERM_SEQUENCE(dst_ser);

    if (needs_free) {
        // If we did not use the series that was passed in, but rather
        // created an internal temporary one, we need to free it.
        Free_Series(src_ser);
    }

    return (action == SYM_APPEND) ? 0 : dst_idx;
}
Esempio n. 28
0
//
//  MAKE_Tuple: C
//
void MAKE_Tuple(REBVAL *out, enum Reb_Kind type, const REBVAL *arg)
{
    if (IS_TUPLE(arg)) {
        *out = *arg;
        return;
    }

    VAL_RESET_HEADER(out, REB_TUPLE);
    REBYTE *vp = VAL_TUPLE(out);

    // !!! Net lookup parses IP addresses out of `tcp://93.184.216.34` or
    // similar URL!s.  In Rebol3 these captures come back the same type
    // as the input instead of as STRING!, which was a latent bug in the
    // network code of the 12-Dec-2012 release:
    //
    // https://github.com/rebol/rebol/blob/master/src/mezz/sys-ports.r#L110
    //
    // All attempts to convert a URL!-flavored IP address failed.  Taking
    // URL! here fixes it, though there are still open questions.
    //
    if (IS_STRING(arg) || IS_URL(arg)) {
        REBCNT len;
        REBYTE *ap = Temp_Byte_Chars_May_Fail(arg, MAX_SCAN_TUPLE, &len, FALSE);
        if (Scan_Tuple(ap, len, out))
            return;
        goto bad_arg;
    }

    if (ANY_ARRAY(arg)) {
        REBCNT len = 0;
        REBINT n;

        RELVAL *item = VAL_ARRAY_AT(arg);

        for (; NOT_END(item); ++item, ++vp, ++len) {
            if (len >= MAX_TUPLE) goto bad_make;
            if (IS_INTEGER(item)) {
                n = Int32(item);
            }
            else if (IS_CHAR(item)) {
                n = VAL_CHAR(item);
            }
            else
                goto bad_make;

            if (n > 255 || n < 0) goto bad_make;
            *vp = n;
        }

        VAL_TUPLE_LEN(out) = len;

        for (; len < MAX_TUPLE; len++) *vp++ = 0;
        return;
    }

    REBCNT alen;

    if (IS_ISSUE(arg)) {
        REBUNI c;
        const REBYTE *ap = VAL_WORD_HEAD(arg);
        REBCNT len = LEN_BYTES(ap);  // UTF-8 len
        if (len & 1) goto bad_arg; // must have even # of chars
        len /= 2;
        if (len > MAX_TUPLE) goto bad_arg; // valid even for UTF-8
        VAL_TUPLE_LEN(out) = len;
        for (alen = 0; alen < len; alen++) {
            const REBOOL unicode = FALSE;
            if (!Scan_Hex2(ap, &c, unicode)) goto bad_arg;
            *vp++ = cast(REBYTE, c);
            ap += 2;
        }
    }
    else if (IS_BINARY(arg)) {
        REBYTE *ap = VAL_BIN_AT(arg);
        REBCNT len = VAL_LEN_AT(arg);
        if (len > MAX_TUPLE) len = MAX_TUPLE;
        VAL_TUPLE_LEN(out) = len;
        for (alen = 0; alen < len; alen++) *vp++ = *ap++;
    }
    else goto bad_arg;

    for (; alen < MAX_TUPLE; alen++) *vp++ = 0;
    return;

bad_arg:
    fail (Error_Invalid_Arg(arg));

bad_make:
    fail (Error_Bad_Make(REB_TUPLE, arg));
}
Esempio n. 29
0
//
//  Make_Set_Operation_Series: C
// 
// Do set operations on a series.  Case-sensitive if `cased` is TRUE.
// `skip` is the record size.
//
static REBSER *Make_Set_Operation_Series(
    const REBVAL *val1,
    const REBVAL *val2,
    REBFLGS flags,
    REBOOL cased,
    REBCNT skip
) {
    REBCNT i;
    REBINT h = 1; // used for both logic true/false and hash check
    REBOOL first_pass = TRUE; // are we in the first pass over the series?
    REBSER *out_ser;

    assert(ANY_SERIES(val1));

    if (val2) {
        assert(ANY_SERIES(val2));

        if (ANY_ARRAY(val1)) {
            if (!ANY_ARRAY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));

            // As long as they're both arrays, we're willing to do:
            //
            //     >> union quote (a b c) 'b/d/e
            //     (a b c d e)
            //
            // The type of the result will match the first value.
        }
        else if (!IS_BINARY(val1)) {

            // We will similarly do any two ANY-STRING! types:
            //
            //      >> union <abc> "bde"
            //      <abcde>

            if (IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
        else {
            // Binaries only operate with other binaries

            if (!IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
    }

    // Calculate `i` as maximum length of result block.  The temporary buffer
    // will be allocated at this size, but copied out at the exact size of
    // the actual result.
    //
    i = VAL_LEN_AT(val1);
    if (flags & SOP_FLAG_BOTH) i += VAL_LEN_AT(val2);

    if (ANY_ARRAY(val1)) {
        REBSER *hser = 0;   // hash table for series
        REBSER *hret;       // hash table for return series

        // The buffer used for building the return series.  Currently it
        // reuses BUF_EMIT, because that buffer is not likely to be in
        // use (emit doesn't call set operations, nor vice versa).  However,
        // other routines may get the same idea and start recursing so it
        // may be better to use something more similar to the mold stack
        // approach of marking off successive ranges in the array.
        //
        REBSER *buffer = ARR_SERIES(BUF_EMIT);
        Resize_Series(buffer, i);
        hret = Make_Hash_Sequence(i);   // allocated

        // Optimization note: !!
        // This code could be optimized for small blocks by not hashing them
        // and extending Find_Key to FIND on the value itself w/o the hash.

        do {
            REBARR *array1 = VAL_ARRAY(val1); // val1 and val2 swapped 2nd pass!

            // Check what is in series1 but not in series2
            //
            if (flags & SOP_FLAG_CHECK)
                hser = Hash_Block(val2, skip, cased);

            // Iterate over first series
            //
            i = VAL_INDEX(val1);
            for (; i < ARR_LEN(array1); i += skip) {
                RELVAL *item = ARR_AT(array1, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = Find_Key_Hashed(
                        VAL_ARRAY(val2),
                        hser,
                        item,
                        VAL_SPECIFIER(val1),
                        skip,
                        cased,
                        1
                    );
                    h = (h >= 0);
                    if (flags & SOP_FLAG_INVERT) h = !h;
                }
                if (h) {
                    Find_Key_Hashed(
                        AS_ARRAY(buffer),
                        hret,
                        item,
                        VAL_SPECIFIER(val1),
                        skip,
                        cased,
                        2
                    );
                }
            }

            if (i != ARR_LEN(array1)) {
                //
                // In the current philosophy, the semantics of what to do
                // with things like `intersect/skip [1 2 3] [7] 2` is too
                // shaky to deal with, so an error is reported if it does
                // not work out evenly to the skip size.
                //
                fail (Error(RE_BLOCK_SKIP_WRONG));
            }

            if (flags & SOP_FLAG_CHECK)
                Free_Series(hser);

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            //
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        if (hret)
            Free_Series(hret);

        out_ser = ARR_SERIES(Copy_Array_Shallow(AS_ARRAY(buffer), SPECIFIED));
        SET_SERIES_LEN(buffer, 0); // required - allow reuse
    }
    else {
        REB_MOLD mo;
        CLEARS(&mo);

        if (IS_BINARY(val1)) {
            //
            // All binaries use "case-sensitive" comparison (e.g. each byte
            // is treated distinctly)
            //
            cased = TRUE;
        }

        // ask mo.series to have at least `i` capacity beyond mo.start
        //
        mo.opts = MOPT_RESERVE;
        mo.reserve = i;
        Push_Mold(&mo);

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!
            REBUNI uc;

            // Iterate over first series
            //
            i = VAL_INDEX(val1);
            for (; i < SER_LEN(ser); i += skip) {
                uc = GET_ANY_CHAR(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = (NOT_FOUND != Find_Str_Char(
                        uc,
                        VAL_SERIES(val2),
                        0,
                        VAL_INDEX(val2),
                        VAL_LEN_HEAD(val2),
                        skip,
                        cased ? AM_FIND_CASE : 0
                    ));

                    if (flags & SOP_FLAG_INVERT) h = !h;
                }

                if (!h) continue;

                if (
                    NOT_FOUND == Find_Str_Char(
                        uc, // c2 (the character to find)
                        mo.series, // ser
                        mo.start, // head
                        mo.start, // index
                        SER_LEN(mo.series), // tail
                        skip, // skip
                        cased ? AM_FIND_CASE : 0 // flags
        )
                ) {
                    Append_String(mo.series, ser, i, skip);
                }
            }

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            //
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        out_ser = Pop_Molded_String(&mo);
    }

    return out_ser;
}
Esempio n. 30
0
//
//  Xandor_Binary: C
// 
// Only valid for BINARY data.
//
REBSER *Xandor_Binary(REBCNT action, REBVAL *value, REBVAL *arg)
{
        REBSER *series;
        REBYTE *p0 = VAL_BIN_AT(value);
        REBYTE *p1 = VAL_BIN_AT(arg);
        REBYTE *p2;
        REBCNT i;
        REBCNT mt, t1, t0, t2;

        t0 = VAL_LEN_AT(value);
        t1 = VAL_LEN_AT(arg);

        mt = MIN(t0, t1); // smaller array size
        // For AND - result is size of shortest input:
//      if (action == A_AND || (action == 0 && t1 >= t0))
//          t2 = mt;
//      else
        t2 = MAX(t0, t1);

        if (IS_BITSET(value)) {
            //
            // Although bitsets and binaries share some implementation here,
            // they have distinct allocation functions...and bitsets need
            // to set the REBSER.misc.negated union field (BITS_NOT) as
            // it would be illegal to read it if it were cleared via another
            // element of the union.
            //
            assert(IS_BITSET(arg));
            series = Make_Bitset(t2 * 8);
        }
        else {
            // Ordinary binary
            //
            series = Make_Binary(t2);
            SET_SERIES_LEN(series, t2);
        }

        p2 = BIN_HEAD(series);

        switch (action) {
        case SYM_AND_T: // and~
            for (i = 0; i < mt; i++) *p2++ = *p0++ & *p1++;
            CLEAR(p2, t2 - mt);
            return series;

        case SYM_OR_T: // or~
            for (i = 0; i < mt; i++) *p2++ = *p0++ | *p1++;
            break;

        case SYM_XOR_T: // xor~
            for (i = 0; i < mt; i++) *p2++ = *p0++ ^ *p1++;
            break;

        default:
            // special bit set case EXCLUDE:
            for (i = 0; i < mt; i++) *p2++ = *p0++ & ~*p1++;
            if (t0 > t1) memcpy(p2, p0, t0 - t1); // residual from first only
            return series;
        }

        // Copy the residual:
        memcpy(p2, ((t0 > t1) ? p0 : p1), t2 - mt);
        return series;
}