Example #1
0
//
//  Temp_Byte_Chars_May_Fail: C
// 
// NOTE: This function returns a temporary result, and uses an internal
// buffer.  Do not use it recursively.  Also, it will Trap on errors.
// 
// Prequalifies a string before using it with a function that
// expects it to be 8-bits.  It would be used for instance to convert
// a string that is potentially REBUNI-wide into a form that can be used
// with a Scan_XXX routine, that is expecting ASCII or UTF-8 source.
// (Many TO-XXX conversions from STRING re-use that scanner logic.)
// 
// Returns a temporary string and sets the length field.
// 
// If `allow_utf8`, the constructed result is converted to UTF8.
// 
// Checks or converts it:
// 
//     1. it is byte string (not unicode)
//     2. if unicode, copy and return as temp byte string
//     3. it's actual content (less space, newlines) <= max len
//     4. it does not contain other values ("123 456")
//     5. it's not empty or only whitespace
//
REBYTE *Temp_Byte_Chars_May_Fail(
    const REBVAL *val,
    REBINT max_len,
    REBCNT *length,
    REBOOL allow_utf8
) {
    REBCNT tail = VAL_LEN_HEAD(val);
    REBCNT index = VAL_INDEX(val);
    REBCNT len;
    REBUNI c;
    REBYTE *bp;
    REBSER *src = VAL_SERIES(val);

    if (index > tail) fail (Error(RE_PAST_END));

    Resize_Series(BYTE_BUF, max_len+1);
    bp = BIN_HEAD(BYTE_BUF);

    // Skip leading whitespace:
    for (; index < tail; index++) {
        c = GET_ANY_CHAR(src, index);
        if (!IS_SPACE(c)) break;
    }

    // Copy chars that are valid:
    for (; index < tail; index++) {
        c = GET_ANY_CHAR(src, index);
        if (c >= 0x80) {
            if (!allow_utf8) fail (Error(RE_INVALID_CHARS));

            len = Encode_UTF8_Char(bp, c);
            max_len -= len;
            bp += len;
        }
        else if (!IS_SPACE(c)) {
            *bp++ = (REBYTE)c;
            max_len--;
        }
        else break;
        if (max_len < 0)
            fail (Error(RE_TOO_LONG));
    }

    // Rest better be just spaces:
    for (; index < tail; index++) {
        c = GET_ANY_CHAR(src, index);
        if (!IS_SPACE(c)) fail (Error(RE_INVALID_CHARS));
    }

    *bp = '\0';

    len = bp - BIN_HEAD(BYTE_BUF);
    if (len == 0) fail (Error(RE_TOO_SHORT));

    if (length) *length = len;

    return BIN_HEAD(BYTE_BUF);
}
Example #2
0
//
//  Make_Set_Operation_Series: C
// 
// Do set operations on a series.  Case-sensitive if `cased` is TRUE.
// `skip` is the record size.
//
static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip)
{
    REBSER *buffer;     // buffer for building the return series
    REBCNT i;
    REBINT h = TRUE;
    REBFLG first_pass = TRUE; // are we in the first pass over the series?
    REBSER *out_ser;

    // This routine should only be called with SERIES! values
    assert(ANY_SERIES(val1));

    if (val2) {
        assert(ANY_SERIES(val2));

        if (ANY_ARRAY(val1)) {
            if (!ANY_ARRAY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));

            // As long as they're both arrays, we're willing to do:
            //
            //     >> union quote (a b c) 'b/d/e
            //     (a b c d e)
            //
            // The type of the result will match the first value.
        }
        else if (!IS_BINARY(val1)) {

            // We will similarly do any two ANY-STRING! types:
            //
            //      >> union <abc> "bde"
            //      <abcde>

            if (IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
        else {
            // Binaries only operate with other binaries

            if (!IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
    }

    // Calculate i as length of result block.
    i = VAL_LEN(val1);
    if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2);

    if (ANY_ARRAY(val1)) {
        REBSER *hser = 0;   // hash table for series
        REBSER *hret;       // hash table for return series

        buffer = BUF_EMIT;          // use preallocated shared block
        Resize_Series(buffer, i);
        hret = Make_Hash_Sequence(i);   // allocated

        // Optimization note: !!
        // This code could be optimized for small blocks by not hashing them
        // and extending Find_Key to do a FIND on the value itself w/o the hash.

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!

            // Check what is in series1 but not in series2:
            if (flags & SOP_FLAG_CHECK)
                hser = Hash_Block(val2, cased);

            // Iterate over first series:
            i = VAL_INDEX(val1);
            for (; i < SERIES_TAIL(ser); i += skip) {
                REBVAL *item = BLK_SKIP(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1);
                    h = (h >= 0);
                    if (flags & SOP_FLAG_INVERT) h = !h;
                }
                if (h) Find_Key(buffer, hret, item, skip, cased, 2);
            }

            if (flags & SOP_FLAG_CHECK)
                Free_Series(hser);

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        if (hret)
            Free_Series(hret);

        out_ser = Copy_Array_Shallow(buffer);
        RESET_TAIL(buffer); // required - allow reuse
    }
    else {
        if (IS_BINARY(val1)) {
            // All binaries use "case-sensitive" comparison (e.g. each byte
            // is treated distinctly)
            cased = TRUE;
        }

        buffer = BUF_MOLD;
        Reset_Buffer(buffer, i);
        RESET_TAIL(buffer);

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!
            REBUNI uc;

            // Iterate over first series:
            i = VAL_INDEX(val1);
            for (; i < SERIES_TAIL(ser); i += skip) {
                uc = GET_ANY_CHAR(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = (NOT_FOUND != Find_Str_Char(
                        VAL_SERIES(val2),
                        0,
                        VAL_INDEX(val2),
                        VAL_TAIL(val2),
                        skip,
                        uc,
                        cased ? AM_FIND_CASE : 0
                    ));

                    if (flags & SOP_FLAG_INVERT) h = !h;
                }

                if (!h) continue;

                if (
                    NOT_FOUND == Find_Str_Char(
                        buffer,
                        0,
                        0,
                        SERIES_TAIL(buffer),
                        skip,
                        uc,
                        cased ? AM_FIND_CASE : 0
                    )
                ) {
                    Append_String(buffer, ser, i, skip);
                }
            }

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        out_ser = Copy_String(buffer, 0, -1);
    }

    return out_ser;
}
Example #3
0
*/	static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags)
/*
**		Do set operations on a series.
**
***********************************************************************/
{
	REBVAL *val;
	REBVAL *val1;
	REBVAL *val2 = 0;
	REBSER *ser;
	REBSER *hser = 0;	// hash table for series
	REBSER *retser;		// return series
	REBSER *hret;		// hash table for return series
	REBCNT i;
	REBINT h = TRUE;
	REBCNT skip = 1;	// record size
	REBCNT cased = 0;	// case sensitive when TRUE

	SET_NONE(D_OUT);
	val1 = D_ARG(1);
	i = 2;

	// Check for second series argument:
	if (flags != SET_OP_UNIQUE) {
		val2 = D_ARG(i++);
		if (VAL_TYPE(val1) != VAL_TYPE(val2))
			raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2));
	}

	// Refinements /case and /skip N
	cased = D_REF(i++); // cased
	if (D_REF(i++)) skip = Int32s(D_ARG(i), 1);

	switch (VAL_TYPE(val1)) {

	case REB_BLOCK:
		i = VAL_LEN(val1);
		// Setup result block:
		if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);
		retser = BUF_EMIT;			// use preallocated shared block
		Resize_Series(retser, i);
		hret = Make_Hash_Sequence(i);	// allocated

		// Optimization note: !!
		// This code could be optimized for small blocks by not hashing them
		// and extending Find_Key to do a FIND on the value itself w/o the hash.

		do {
			// Check what is in series1 but not in series2:
			if (GET_FLAG(flags, SOP_CHECK))
				hser = Hash_Block(val2, cased);

			// Iterate over first series:
			ser = VAL_SERIES(val1);
			i = VAL_INDEX(val1);
			for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) {
				if (GET_FLAG(flags, SOP_CHECK)) {
					h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0;
					if (GET_FLAG(flags, SOP_INVERT)) h = !h;
				}
				if (h) Find_Key(retser, hret, val, skip, cased, 2);
			}

			// Iterate over second series?
			if ((i = GET_FLAG(flags, SOP_BOTH))) {
				val = val1;
				val1 = val2;
				val2 = val;
				CLR_FLAG(flags, SOP_BOTH);
			}

			if (GET_FLAG(flags, SOP_CHECK))
				Free_Series(hser);
		} while (i);

		if (hret)
			Free_Series(hret);

		Val_Init_Block(D_OUT, Copy_Array_Shallow(retser));
		RESET_TAIL(retser); // required - allow reuse

		break;

	case REB_BINARY:
		cased = TRUE;
		SET_TYPE(D_OUT, REB_BINARY);
	case REB_STRING:
		i = VAL_LEN(val1);
		// Setup result block:
		if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);

		retser = BUF_MOLD;
		Reset_Buffer(retser, i);
		RESET_TAIL(retser);

		do {
			REBUNI uc;

			cased = cased ? AM_FIND_CASE : 0;

			// Iterate over first series:
			ser = VAL_SERIES(val1);
			i = VAL_INDEX(val1);
			for (; i < SERIES_TAIL(ser); i += skip) {
				uc = GET_ANY_CHAR(ser, i);
				if (GET_FLAG(flags, SOP_CHECK)) {
					h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND;
					if (GET_FLAG(flags, SOP_INVERT)) h = !h;
				}
				if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) {
					Append_String(retser, ser, i, skip);
				}
			}

			// Iterate over second series?
			if ((i = GET_FLAG(flags, SOP_BOTH))) {
				val = val1;
				val1 = val2;
				val2 = val;
				CLR_FLAG(flags, SOP_BOTH);
			}
		} while (i);

		ser = Copy_String(retser, 0, -1);
		if (IS_BINARY(D_OUT))
			Val_Init_Binary(D_OUT, ser);
		else
			Val_Init_String(D_OUT, ser);
		break;

	case REB_BITSET:
		switch (flags) {
		case SET_OP_UNIQUE:
			return R_ARG1;
		case SET_OP_UNION:
			i = A_OR;
			break;
		case SET_OP_INTERSECT:
			i = A_AND;
			break;
		case SET_OP_DIFFERENCE:
			i = A_XOR;
			break;
		case SET_OP_EXCLUDE:
			i = 0; // special case
			break;
		}
		ser = Xandor_Binary(i, val1, val2);
		Val_Init_Bitset(D_OUT, ser);
		break;

	case REB_TYPESET:
		switch (flags) {
		case SET_OP_UNIQUE:
			break;
		case SET_OP_UNION:
			VAL_TYPESET(val1) |= VAL_TYPESET(val2);
			break;
		case SET_OP_INTERSECT:
			VAL_TYPESET(val1) &= VAL_TYPESET(val2);
			break;
		case SET_OP_DIFFERENCE:
			VAL_TYPESET(val1) ^= VAL_TYPESET(val2);
			break;
		case SET_OP_EXCLUDE:
			VAL_TYPESET(val1) &= ~VAL_TYPESET(val2);
			break;
		}
		return R_ARG1;

	default:
		raise Error_Invalid_Arg(val1);
	}

	return R_OUT;
}
Example #4
0
//
//  Make_Set_Operation_Series: C
// 
// Do set operations on a series.  Case-sensitive if `cased` is TRUE.
// `skip` is the record size.
//
static REBSER *Make_Set_Operation_Series(
    const REBVAL *val1,
    const REBVAL *val2,
    REBFLGS flags,
    REBOOL cased,
    REBCNT skip
) {
    REBCNT i;
    REBINT h = 1; // used for both logic true/false and hash check
    REBOOL first_pass = TRUE; // are we in the first pass over the series?
    REBSER *out_ser;

    assert(ANY_SERIES(val1));

    if (val2) {
        assert(ANY_SERIES(val2));

        if (ANY_ARRAY(val1)) {
            if (!ANY_ARRAY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));

            // As long as they're both arrays, we're willing to do:
            //
            //     >> union quote (a b c) 'b/d/e
            //     (a b c d e)
            //
            // The type of the result will match the first value.
        }
        else if (!IS_BINARY(val1)) {

            // We will similarly do any two ANY-STRING! types:
            //
            //      >> union <abc> "bde"
            //      <abcde>

            if (IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
        else {
            // Binaries only operate with other binaries

            if (!IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
    }

    // Calculate `i` as maximum length of result block.  The temporary buffer
    // will be allocated at this size, but copied out at the exact size of
    // the actual result.
    //
    i = VAL_LEN_AT(val1);
    if (flags & SOP_FLAG_BOTH) i += VAL_LEN_AT(val2);

    if (ANY_ARRAY(val1)) {
        REBSER *hser = 0;   // hash table for series
        REBSER *hret;       // hash table for return series

        // The buffer used for building the return series.  Currently it
        // reuses BUF_EMIT, because that buffer is not likely to be in
        // use (emit doesn't call set operations, nor vice versa).  However,
        // other routines may get the same idea and start recursing so it
        // may be better to use something more similar to the mold stack
        // approach of marking off successive ranges in the array.
        //
        REBSER *buffer = ARR_SERIES(BUF_EMIT);
        Resize_Series(buffer, i);
        hret = Make_Hash_Sequence(i);   // allocated

        // Optimization note: !!
        // This code could be optimized for small blocks by not hashing them
        // and extending Find_Key to FIND on the value itself w/o the hash.

        do {
            REBARR *array1 = VAL_ARRAY(val1); // val1 and val2 swapped 2nd pass!

            // Check what is in series1 but not in series2
            //
            if (flags & SOP_FLAG_CHECK)
                hser = Hash_Block(val2, skip, cased);

            // Iterate over first series
            //
            i = VAL_INDEX(val1);
            for (; i < ARR_LEN(array1); i += skip) {
                RELVAL *item = ARR_AT(array1, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = Find_Key_Hashed(
                        VAL_ARRAY(val2),
                        hser,
                        item,
                        VAL_SPECIFIER(val1),
                        skip,
                        cased,
                        1
                    );
                    h = (h >= 0);
                    if (flags & SOP_FLAG_INVERT) h = !h;
                }
                if (h) {
                    Find_Key_Hashed(
                        AS_ARRAY(buffer),
                        hret,
                        item,
                        VAL_SPECIFIER(val1),
                        skip,
                        cased,
                        2
                    );
                }
            }

            if (i != ARR_LEN(array1)) {
                //
                // In the current philosophy, the semantics of what to do
                // with things like `intersect/skip [1 2 3] [7] 2` is too
                // shaky to deal with, so an error is reported if it does
                // not work out evenly to the skip size.
                //
                fail (Error(RE_BLOCK_SKIP_WRONG));
            }

            if (flags & SOP_FLAG_CHECK)
                Free_Series(hser);

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            //
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        if (hret)
            Free_Series(hret);

        out_ser = ARR_SERIES(Copy_Array_Shallow(AS_ARRAY(buffer), SPECIFIED));
        SET_SERIES_LEN(buffer, 0); // required - allow reuse
    }
    else {
        REB_MOLD mo;
        CLEARS(&mo);

        if (IS_BINARY(val1)) {
            //
            // All binaries use "case-sensitive" comparison (e.g. each byte
            // is treated distinctly)
            //
            cased = TRUE;
        }

        // ask mo.series to have at least `i` capacity beyond mo.start
        //
        mo.opts = MOPT_RESERVE;
        mo.reserve = i;
        Push_Mold(&mo);

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!
            REBUNI uc;

            // Iterate over first series
            //
            i = VAL_INDEX(val1);
            for (; i < SER_LEN(ser); i += skip) {
                uc = GET_ANY_CHAR(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = (NOT_FOUND != Find_Str_Char(
                        uc,
                        VAL_SERIES(val2),
                        0,
                        VAL_INDEX(val2),
                        VAL_LEN_HEAD(val2),
                        skip,
                        cased ? AM_FIND_CASE : 0
                    ));

                    if (flags & SOP_FLAG_INVERT) h = !h;
                }

                if (!h) continue;

                if (
                    NOT_FOUND == Find_Str_Char(
                        uc, // c2 (the character to find)
                        mo.series, // ser
                        mo.start, // head
                        mo.start, // index
                        SER_LEN(mo.series), // tail
                        skip, // skip
                        cased ? AM_FIND_CASE : 0 // flags
        )
                ) {
                    Append_String(mo.series, ser, i, skip);
                }
            }

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            //
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        out_ser = Pop_Molded_String(&mo);
    }

    return out_ser;
}
Example #5
0
*/	static REBINT Do_Cmd(REBDIA *dia)
/*
**		Returns the length of command processed or error. See below.
**
***********************************************************************/
{
	REBVAL *fargs;
	REBINT size;
	REBVAL *val;
	REBINT err;
	REBINT n;

	// Get formal arguments block for this command:
	fargs = FRM_VALUES(dia->dialect) + dia->cmd;
	if (!IS_BLOCK(fargs)) return -REB_DIALECT_BAD_SPEC;
	dia->fargs = VAL_SERIES(fargs);
	fargs = VAL_BLK_DATA(fargs);
	size = Count_Dia_Args(fargs); // approximate

	// Preallocate output block (optimize for large blocks):
	if (dia->len > size) size = dia->len;
	if (GET_FLAG(dia->flags, RDIA_ALL)) {
		Extend_Series(dia->out, size+1);
	}
	else {
		Resize_Series(dia->out, size+1); // tail = 0
		CLEAR_SERIES(dia->out); // Be sure it is entirely cleared
	}

	// Insert command word:
	if (!GET_FLAG(dia->flags, RDIA_NO_CMD)) {
		val = Append_Value(dia->out);
		Set_Word(val, FRM_WORD_SYM(dia->dialect, dia->cmd), dia->dialect, dia->cmd);
		if (GET_FLAG(dia->flags, RDIA_LIT_CMD)) VAL_SET(val, REB_LIT_WORD);
		dia->outi++;
		size++;
	}
	if (dia->cmd > 1) dia->argi++; // default cmd has no word arg

	// Foreach argument provided:
	for (n = dia->len; n > 0; n--, dia->argi++) {
		val = Eval_Arg(dia);
		if (!val)
			return -REB_DIALECT_BAD_ARG;
		if (IS_END(val)) break;
		if (!IS_NONE(val)) {
			//Print("n %d len %d argi %d", n, dia->len, dia->argi);
			err = Add_Arg(dia, val); // 1: good, 0: no-type, -N: error
			if (err == 0) return n; // remainder
			if (err < 0) return err;
		}
	}

	// If not enough args, pad with NONE values:
	if (dia->cmd > 1) {
		for (n = SERIES_TAIL(dia->out); n < size; n++) {
			Append_Value(dia->out);
		}
	}

	dia->outi = SERIES_TAIL(dia->out);

	return 0;
}
Example #6
0
*/	REBYTE *Temp_Byte_Chars_May_Fail(const REBVAL *val, REBINT max_len, REBCNT *length, REBINT opts)
/*
**	NOTE: This function returns a temporary result, and uses an internal
**	buffer.  Do not use it recursively.  Also, it will Trap on errors.
**
**	Prequalifies a string before using it with a function that
**	expects it to be 8-bits.  It would be used for instance to convert
**	a string that is potentially REBUNI-wide into a form that can be used
**	with a Scan_XXX routine, that is expecting ASCII or UTF-8 source.
**	(Many TO-XXX conversions from STRING re-use that scanner logic.)
**
**	Returns a temporary string and sets the length field.
**
**	Opts can be:
**		0 - no special options
**		1 - allow UTF8 (val is converted to UTF8 during qualification)
**		2 - allow binary
**
**	Checks or converts it:
**
**		1. it is byte string (not unicode)
**		2. if unicode, copy and return as temp byte string
**		3. it's actual content (less space, newlines) <= max len
**		4. it does not contain other values ("123 456")
**		5. it's not empty or only whitespace
**
***********************************************************************/
{
	REBCNT tail = VAL_TAIL(val);
	REBCNT index = VAL_INDEX(val);
	REBCNT len;
	REBUNI c;
	REBYTE *bp;
	REBSER *src = VAL_SERIES(val);

	if (index > tail) raise Error_0(RE_PAST_END);

	Resize_Series(BUF_FORM, max_len+1);
	bp = BIN_HEAD(BUF_FORM);

	// Skip leading whitespace:
	for (; index < tail; index++) {
		c = GET_ANY_CHAR(src, index);
		if (!IS_SPACE(c)) break;
	}

	// Copy chars that are valid:
	for (; index < tail; index++) {
		c = GET_ANY_CHAR(src, index);
		if (opts < 2 && c >= 0x80) {
			if (opts == 0) raise Error_0(RE_INVALID_CHARS);
			len = Encode_UTF8_Char(bp, c);
			max_len -= len;
			bp += len;
		}
		else if (!IS_SPACE(c)) {
			*bp++ = (REBYTE)c;
			max_len--;
		}
		else break;
		if (max_len < 0)
			raise Error_0(RE_TOO_LONG);
	}

	// Rest better be just spaces:
	for (; index < tail; index++) {
		c = GET_ANY_CHAR(src, index);
		if (!IS_SPACE(c)) raise Error_0(RE_INVALID_CHARS);
	}

	*bp= 0;

	len = bp - BIN_HEAD(BUF_FORM);
	if (len == 0) raise Error_0(RE_TOO_SHORT);

	if (length) *length = len;

	return BIN_HEAD(BUF_FORM);
}