Пример #1
0
*/	REBINT Compare_String_Vals(REBVAL *v1, REBVAL *v2, REBOOL uncase)
/*
**		Compare two string values. Either can be byte or unicode wide.
**
**		Uncase: compare is case-insensitive.
**
**		Used for: general string comparions (various places)
**
***********************************************************************/
{
	REBCNT l1  = VAL_LEN(v1);
	REBCNT l2  = VAL_LEN(v2);
	REBCNT len = MIN(l1, l2);
	REBINT n;

	if (IS_BINARY(v1) || IS_BINARY(v2)) uncase = FALSE;

	if (VAL_BYTE_SIZE(v1)) { // v1 is 8
		if (VAL_BYTE_SIZE(v2))
			n = Compare_Bytes(VAL_BIN_DATA(v1), VAL_BIN_DATA(v2), len, uncase);
		else
			n = -Compare_Uni_Byte(VAL_UNI_DATA(v2), VAL_BIN_DATA(v1), len, uncase);
	}
	else { // v1 is 16
		if (VAL_BYTE_SIZE(v2))
			n = Compare_Uni_Byte(VAL_UNI_DATA(v1), VAL_BIN_DATA(v2), len, uncase);
		else
			n = Compare_Uni_Str(VAL_UNI_DATA(v1), VAL_UNI_DATA(v2), len, uncase);
	}

	if (n != 0) return n;
	return l1 - l2;
}
Пример #2
0
static int Check_Char_Range(REBVAL *val, REBINT limit)
{
    REBCNT len;

    if (IS_CHAR(val)) {
        if (VAL_CHAR(val) > limit) return R_FALSE;
        return R_TRUE;
    }

    if (IS_INTEGER(val)) {
        if (VAL_INT64(val) > limit) return R_FALSE;
        return R_TRUE;
    }

    len = VAL_LEN_AT(val);
    if (VAL_BYTE_SIZE(val)) {
        REBYTE *bp = VAL_BIN_AT(val);
        if (limit == 0xff) return R_TRUE; // by definition
        for (; len > 0; len--, bp++)
            if (*bp > limit) return R_FALSE;
    } else {
        REBUNI *up = VAL_UNI_AT(val);
        for (; len > 0; len--, up++)
            if (*up > limit) return R_FALSE;
    }

    return R_TRUE;
}
Пример #3
0
static void reverse_string(REBVAL *value, REBCNT len)
{
	REBCNT n;
	REBCNT m;
	REBUNI c;

	if (VAL_BYTE_SIZE(value)) {
		REBYTE *bp = VAL_BIN_DATA(value);

		for (n = 0, m = len-1; n < len / 2; n++, m--) {
			c = bp[n];
			bp[n] = bp[m];
			bp[m] = (REBYTE)c;
		}
	}
	else {
		REBUNI *up = VAL_UNI_DATA(value);

		for (n = 0, m = len-1; n < len / 2; n++, m--) {
			c = up[n];
			up[n] = up[m];
			up[m] = c;
		}
	}
}
Пример #4
0
static REBCNT find_string(REBSER *series, REBCNT index, REBCNT end, REBVAL *target, REBCNT len, REBCNT flags, REBINT skip)
{
	REBCNT start = index;

	if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) {
		skip = -1;
		start = 0;
		if (flags & AM_FIND_LAST) index = end - len;
		else index--;
	}

	if (ANY_BINSTR(target)) {
		// Do the optimal search or the general search?
		if (BYTE_SIZE(series) && VAL_BYTE_SIZE(target) && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH)))
			return Find_Byte_Str(series, start, VAL_BIN_DATA(target), len, !GET_FLAG(flags, ARG_FIND_CASE-1), GET_FLAG(flags, ARG_FIND_MATCH-1));
		else
			return Find_Str_Str(series, start, index, end, skip, VAL_SERIES(target), VAL_INDEX(target), len, flags & (AM_FIND_MATCH|AM_FIND_CASE));
	}
	else if (IS_BINARY(target)) {
		return Find_Byte_Str(series, start, VAL_BIN_DATA(target), len, 0, GET_FLAG(flags, ARG_FIND_MATCH-1));
	}
	else if (IS_CHAR(target)) {
		return Find_Str_Char(series, start, index, end, skip, VAL_CHAR(target), flags);
	}
	else if (IS_INTEGER(target)) {
		return Find_Str_Char(series, start, index, end, skip, (REBUNI)VAL_INT32(target), flags);
	}
	else if (IS_BITSET(target)) {
		return Find_Str_Bitset(series, start, index, end, skip, VAL_SERIES(target), flags);
	}

	return NOT_FOUND;
}
Пример #5
0
//
//  Find_Max_Bit: C
// 
// Return integer number for the maximum bit number defined by
// the value. Used to determine how much space to allocate.
//
REBINT Find_Max_Bit(REBVAL *val)
{
    REBINT maxi = 0;
    REBINT n;

    switch (VAL_TYPE(val)) {

    case REB_CHAR:
        maxi = VAL_CHAR(val)+1;
        break;

    case REB_INTEGER:
        maxi = Int32s(val, 0);
        break;

    case REB_STRING:
    case REB_FILE:
    case REB_EMAIL:
    case REB_URL:
    case REB_TAG:
//  case REB_ISSUE:
        n = VAL_INDEX(val);
        if (VAL_BYTE_SIZE(val)) {
            REBYTE *bp = VAL_BIN(val);
            for (; n < cast(REBINT, VAL_LEN_HEAD(val)); n++)
                if (bp[n] > maxi) maxi = bp[n];
        }
        else {
            REBUNI *up = VAL_UNI(val);
            for (; n < cast(REBINT, VAL_LEN_HEAD(val)); n++)
                if (up[n] > maxi) maxi = up[n];
        }
        maxi++;
        break;

    case REB_BINARY:
        maxi = VAL_LEN_AT(val) * 8 - 1;
        if (maxi < 0) maxi = 0;
        break;

    case REB_BLOCK:
        for (val = VAL_ARRAY_AT(val); NOT_END(val); val++) {
            n = Find_Max_Bit(val);
            if (n > maxi) maxi = n;
        }
        //maxi++;
        break;

    case REB_NONE:
        maxi = 0;
        break;

    default:
        return -1;
    }

    return maxi;
}
Пример #6
0
*/	static void replace_with(REBSER *ser, REBCNT index, REBCNT tail, REBVAL *with)
/*
**		Replace whitespace chars that match WITH string.
**
**		Resulting string is always smaller than it was to start.
**
***********************************************************************/
{
	#define MAX_WITH 32
	REBCNT wlen;
	REBUNI with_chars[MAX_WITH];	// chars to be trimmed
	REBUNI *up = with_chars;
	REBYTE *bp;
	REBCNT n;
	REBUNI uc;

	// Setup WITH array from arg or the default:
	n = 0;
	if (IS_NONE(with)) {
		bp = "\n \r\t";
		wlen = n = 4;
	}
	else if (IS_CHAR(with)) {
		wlen = 1;
		*up++ = VAL_CHAR(with);
	}
	else if (IS_INTEGER(with)) {
		wlen = 1;
		*up++ = Int32s(with, 0);
	}
	else if (ANY_BINSTR(with)) {
		n = VAL_LEN(with);
		if (n >= MAX_WITH) n = MAX_WITH-1;
		wlen = n;
		if (VAL_BYTE_SIZE(with)) {
			bp = VAL_BIN_DATA(with);
		} else {
			memcpy(up, VAL_UNI_DATA(with), n * sizeof(REBUNI));
			n = 0;
		}
	}
	for (; n > 0; n--) *up++ = (REBUNI)*bp++;

	// Remove all occurances of chars found in WITH string:
	for (n = index; index < tail; index++) {
		uc = GET_ANY_CHAR(ser, index);
		if (!find_in_uni(with_chars, wlen, uc)) {
			SET_ANY_CHAR(ser, n, uc);
			n++;
		}
	}

	SET_ANY_CHAR(ser, n, 0);	
	SERIES_TAIL(ser) = n;
}
Пример #7
0
*/  void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper)
/*
**      Common code for string case handling.
**
***********************************************************************/
{
	REBCNT len;
	REBCNT n;

	*out = *val;

	if (IS_CHAR(val)) {
		REBUNI c = VAL_CHAR(val);
		if (c < UNICODE_CASES) {
			c = upper ? UP_CASE(c) : LO_CASE(c);
		}
		VAL_CHAR(out) = c;
		return;
	}

	// String series:

	if (IS_PROTECT_SERIES(VAL_SERIES(val))) raise Error_0(RE_PROTECTED);

	len = Partial(val, 0, part, 0);
	n = VAL_INDEX(val);
	len += n;

	if (VAL_BYTE_SIZE(val)) {
		REBYTE *bp = VAL_BIN(val);
		if (upper)
			for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]);
		else {
			for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]);
		}
	} else {
		REBUNI *up = VAL_UNI(val);
		if (upper) {
			for (; n < len; n++) {
				if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]);
			}
		}
		else {
			for (; n < len; n++) {
				if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]);
			}
		}
	}
}
Пример #8
0
//
//  Check_Bit_Str: C
// 
// If uncased is TRUE, try to match either upper or lower case.
//
REBOOL Check_Bit_Str(REBSER *bset, const REBVAL *val, REBOOL uncased)
{
    REBCNT n = VAL_INDEX(val);

    if (VAL_BYTE_SIZE(val)) {
        REBYTE *bp = VAL_BIN(val);
        for (; n < VAL_LEN_HEAD(val); n++)
            if (Check_Bit(bset, bp[n], uncased)) return TRUE;
    }
    else {
        REBUNI *up = VAL_UNI(val);
        for (; n < VAL_LEN_HEAD(val); n++)
            if (Check_Bit(bset, up[n], uncased)) return TRUE;
    }
    return FALSE;
}
Пример #9
0
*/	REBSER *Encode_UTF8_Value(REBVAL *arg, REBCNT len, REBFLG opts)
/*
**		Do all the details to encode a string as UTF8.
**		No_copy means do not make a copy.
**		Result can be a shared buffer!
**
***********************************************************************/
{
	REBSER * ser;
	if (VAL_BYTE_SIZE(arg)) {
		ser = Encode_UTF8_String(VAL_BIN_DATA(arg), len, FALSE, opts);
	} else {
		ser = Encode_UTF8_String(VAL_UNI_DATA(arg), len, TRUE, opts);
	}
	return ser;
}
Пример #10
0
//
//  Change_Case: C
// 
// Common code for string case handling.
//
void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper)
{
    REBCNT len;
    REBCNT n;

    *out = *val;

    if (IS_CHAR(val)) {
        REBUNI c = VAL_CHAR(val);
        if (c < UNICODE_CASES) {
            c = upper ? UP_CASE(c) : LO_CASE(c);
        }
        VAL_CHAR(out) = c;
        return;
    }

    // String series:

    FAIL_IF_LOCKED_SERIES(VAL_SERIES(val));

    len = Partial(val, 0, part);
    n = VAL_INDEX(val);
    len += n;

    if (VAL_BYTE_SIZE(val)) {
        REBYTE *bp = VAL_BIN(val);
        if (upper)
            for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]);
        else {
            for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]);
        }
    } else {
        REBUNI *up = VAL_UNI(val);
        if (upper) {
            for (; n < len; n++) {
                if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]);
            }
        }
        else {
            for (; n < len; n++) {
                if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]);
            }
        }
    }
}
Пример #11
0
*/	REBFLG Check_Bit_Str(REBSER *bset, REBVAL *val, REBFLG uncased)
/*
**		If uncased is TRUE, try to match either upper or lower case.
**
***********************************************************************/
{
	REBCNT n = VAL_INDEX(val);

	if (VAL_BYTE_SIZE(val)) {
		REBYTE *bp = VAL_BIN(val);
		for (; n < VAL_TAIL(val); n++)
			if (Check_Bit(bset, bp[n], uncased)) return TRUE;
	}
	else {
		REBUNI *up = VAL_UNI(val);
		for (; n < VAL_TAIL(val); n++)
			if (Check_Bit(bset, up[n], uncased)) return TRUE;
	}
	return FALSE;
}
Пример #12
0
*/	REBINT Find_Max_Bit(REBVAL *val)
/*
**		Return integer number for the maximum bit number defined by
**		the value. Used to determine how much space to allocate.
**
***********************************************************************/
{
	REBINT maxi = 0;
	REBINT n;

	switch (VAL_TYPE(val)) {

	case REB_CHAR:
		maxi = VAL_CHAR(val)+1;
		break;

	case REB_INTEGER:
		maxi = Int32s(val, 0);
		break;

	case REB_STRING:
	case REB_FILE:
	case REB_EMAIL:
	case REB_URL:
	case REB_TAG:
//	case REB_ISSUE:
		n = VAL_INDEX(val);
		if (VAL_BYTE_SIZE(val)) {
			REBYTE *bp = VAL_BIN(val);
			for (; n < (REBINT)VAL_TAIL(val); n++)
				if (bp[n] > maxi) maxi = bp[n];
		}
		else {
			REBUNI *up = VAL_UNI(val);
			for (; n < (REBINT)VAL_TAIL(val); n++)
				if (up[n] > maxi) maxi = up[n];
		}
		maxi++;
		break;

	case REB_BINARY:
		maxi = VAL_LEN(val) * 8 - 1;
		if (maxi < 0) maxi = 0;
		break;

	case REB_BLOCK:
		for (val = VAL_BLK_DATA(val); NOT_END(val); val++) {
			n = Find_Max_Bit(val);
			if (n > maxi) maxi = n;
		}
		//maxi++;
		break;

	case REB_NONE:
		maxi = 0;
		break;

	default:
		return -1;
	}

	return maxi;
}
Пример #13
0
//
//  Temp_Bin_Str_Managed: C
// 
// Determines if UTF8 conversion is needed for a series before it
// is used with a byte-oriented function.
// 
// If conversion is needed, a UTF8 series will be created.  Otherwise,
// the source series is returned as-is.
// 
// Note: This routine should only be used to generate a value used
// for temporary purposes, because it has a "surprising variance"
// regarding its input.  If the value's series can be reused, it is--
// and this depends on an implementation detail of internal encoding
// that the user should not be aware of (they need not know if the
// internal representation of an ASCII string uses 1, 2, or however
// many bytes).  But copying vs. non-copying means the resulting
// data might or might not have previous values available to step
// back into from the originating series!
// 
// !!! Should performance dictate it, the callsites could be
// adapted to know whether this produced a new series or not, and
// instead of managing a created result they could be responsible
// for freeing it if so.
//
REBSER *Temp_Bin_Str_Managed(const REBVAL *val, REBCNT *index, REBCNT *length)
{
    REBCNT len = (length && *length) ? *length : VAL_LEN_AT(val);
    REBSER *series;

    assert(IS_BINARY(val) || ANY_STRING(val));

    // !!! This used to check `len == 0` and reuse a zero length string.
    // However, the zero length string could have the wrong width.  We are
    // expected to be returning a BYTE_SIZE() string, and that confused
    // things.  It's not a good idea to mutate the source string (e.g.
    // reallocate under a new width) so consider having an EMPTY_BYTE_STRING
    // like EMPTY_ARRAY which is protected to hand back.
    //
    if (
        IS_BINARY(val)
        || (
            VAL_BYTE_SIZE(val)
            && All_Bytes_ASCII(VAL_BIN_AT(val), VAL_LEN_AT(val))
        )
    ){
        //
        // It's BINARY!, or an ANY-STRING! whose codepoints are all values in
        // ASCII (0x00 => 0x7F), hence not needing any UTF-8 encoding.
        //
        series = VAL_SERIES(val);
        ASSERT_SERIES_MANAGED(series);

        if (index)
            *index = VAL_INDEX(val);
        if (length)
            *length = len;
    }
    else {
        // UTF-8 conversion is required, and we manage the result.

        series = Make_UTF8_From_Any_String(val, len, OPT_ENC_CRLF_MAYBE);
        MANAGE_SERIES(series);

    #if !defined(NDEBUG)
        //
        // Also, PROTECT the result in the debug build...because since the
        // caller doesn't know if a new series was created or if the initial
        // data is being used, they should not be modifying it!  (We don't
        // want to protect the original data, because we wouldn't know when
        // we were allowed to unlock it...there's no later call in this
        // model to clean up the series.)
        {
            REBVAL protect;
            Val_Init_String(&protect, series);

            Protect_Value(&protect, FLAGIT(PROT_SET));

            // just a string...not /DEEP...shouldn't need to Unmark()
        }
    #endif

        if (index)
            *index = 0;
        if (length)
            *length = SER_LEN(series);
    }

    assert(BYTE_SIZE(series));
    return series;
}
Пример #14
0
static REBCNT find_string(
    REBSER *series,
    REBCNT index,
    REBCNT end,
    REBVAL *target,
    REBCNT target_len,
    REBCNT flags,
    REBINT skip
) {
    assert(end >= index);
    
    if (target_len > end - index) // series not long enough to have target
        return NOT_FOUND;

    REBCNT start = index;

    if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) {
        skip = -1;
        start = 0;
        if (flags & AM_FIND_LAST) index = end - target_len;
        else index--;
    }

    if (ANY_BINSTR(target)) {
        // Do the optimal search or the general search?
        if (
            BYTE_SIZE(series)
            && VAL_BYTE_SIZE(target)
            && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH))
        ) {
            return Find_Byte_Str(
                series,
                start,
                VAL_BIN_AT(target),
                target_len,
                NOT(GET_FLAG(flags, ARG_FIND_CASE - 1)),
                GET_FLAG(flags, ARG_FIND_MATCH - 1)
            );
        }
        else {
            return Find_Str_Str(
                series,
                start,
                index,
                end,
                skip,
                VAL_SERIES(target),
                VAL_INDEX(target),
                target_len,
                flags & (AM_FIND_MATCH|AM_FIND_CASE)
            );
        }
    }
    else if (IS_BINARY(target)) {
        const REBOOL uncase = FALSE;
        return Find_Byte_Str(
            series,
            start,
            VAL_BIN_AT(target),
            target_len,
            uncase, // "don't treat case insensitively"
            GET_FLAG(flags, ARG_FIND_MATCH - 1)
        );
    }
    else if (IS_CHAR(target)) {
        return Find_Str_Char(
            VAL_CHAR(target),
            series,
            start,
            index,
            end,
            skip,
            flags
        );
    }
    else if (IS_INTEGER(target)) {
        return Find_Str_Char(
            cast(REBUNI, VAL_INT32(target)),
            series,
            start,
            index,
            end,
            skip,
            flags
        );
    }
    else if (IS_BITSET(target)) {
        return Find_Str_Bitset(
            series,
            start,
            index,
            end,
            skip,
            VAL_SERIES(target),
            flags
        );
    }

    return NOT_FOUND;
}