Пример #1
0
*/	REBINT Compare_Uni_Str(REBUNI *u1, REBUNI *u2, REBCNT len, REBOOL uncase)
/*
**		Compare two unicode-wide strings. Return lexical difference.
**
**		Uncase: compare is case-insensitive.
**
***********************************************************************/
{
	REBINT d;
	REBUNI c1;
	REBUNI c2;

	for (; len > 0; len--) {

		c1 = *u1++;
		c2 = *u2++;

		if (uncase && c1 < UNICODE_CASES && c2 < UNICODE_CASES)
			d = LO_CASE(c1) - LO_CASE(c2);
		else
			d = c1 - c2;

		if (d != 0) return d;
	}

	return 0;
}
Пример #2
0
*/	REBCNT Find_Str_Char(REBSER *ser, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBUNI c2, REBCNT flags)
/*
**		General purpose find a char in a string.
**
**		Supports: forward/reverse with skip, cased/uncase, Unicode/byte.
**
**		Skip can be set positive or negative (for reverse).
**
**		Flags are set according to ALL_FIND_REFS
**
***********************************************************************/
{
	REBUNI c1;
	REBOOL uncase = !GET_FLAG(flags, ARG_FIND_CASE-1); // uncase = case insenstive

	if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2);

	for (; index >= head && index < tail; index += skip) {

		c1 = GET_ANY_CHAR(ser, index);
		if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1);

		if (c1 == c2) return index;

		if GET_FLAG(flags, ARG_FIND_MATCH-1) break;
	}

	return NOT_FOUND;
}
Пример #3
0
*/	REBCNT Find_Byte_Str(REBSER *series, REBCNT index, REBYTE *b2, REBCNT l2, REBFLG uncase, REBFLG match)
/*
**		Find a byte string within a byte string. Optimized for speed.
**
**		Returns starting position or NOT_FOUND.
**
**		Uncase: compare is case-insensitive.
**		Match: compare to first position only.
**
**		NOTE: Series tail must be > index.
**
***********************************************************************/
{
	REBYTE *b1;
	REBYTE *e1;
	REBCNT l1;
	REBYTE c;
	REBCNT n;

	// The pattern empty or is longer than the target:
	if (l2 == 0 || (l2 + index) > SERIES_TAIL(series)) return NOT_FOUND;

	b1 = BIN_SKIP(series, index);
	l1 = SERIES_TAIL(series) - index;

	e1 = b1 + (match ? 1 : l1 - (l2 - 1));

	c = *b2; // first char

	if (!uncase) {

		while (b1 != e1) {
			if (*b1 == c) { // matched first char
				for (n = 1; n < l2; n++) {
					if (b1[n] != b2[n]) break;
				}
				if (n == l2) return (b1 - BIN_HEAD(series));
			}
			b1++;
		}

	} else {

		c = (REBYTE)LO_CASE(c); // OK! (never > 255)

		while (b1 != e1) {
			if (LO_CASE(*b1) == c) { // matched first char
				for (n = 1; n < l2; n++) {
					if (LO_CASE(b1[n]) != LO_CASE(b2[n])) break;
				}
				if (n == l2) return (b1 - BIN_HEAD(series));
			}
			b1++;
		}

	}

	return NOT_FOUND;
}
Пример #4
0
*/  void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper)
/*
**      Common code for string case handling.
**
***********************************************************************/
{
	REBCNT len;
	REBCNT n;

	*out = *val;

	if (IS_CHAR(val)) {
		REBUNI c = VAL_CHAR(val);
		if (c < UNICODE_CASES) {
			c = upper ? UP_CASE(c) : LO_CASE(c);
		}
		VAL_CHAR(out) = c;
		return;
	}

	// String series:

	if (IS_PROTECT_SERIES(VAL_SERIES(val))) raise Error_0(RE_PROTECTED);

	len = Partial(val, 0, part, 0);
	n = VAL_INDEX(val);
	len += n;

	if (VAL_BYTE_SIZE(val)) {
		REBYTE *bp = VAL_BIN(val);
		if (upper)
			for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]);
		else {
			for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]);
		}
	} else {
		REBUNI *up = VAL_UNI(val);
		if (upper) {
			for (; n < len; n++) {
				if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]);
			}
		}
		else {
			for (; n < len; n++) {
				if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]);
			}
		}
	}
}
Пример #5
0
*/	REBYTE *Match_Bytes(REBYTE *src, REBYTE *pat)
/*
**		Compare two binary strings. Return where the first differed.
**		Case insensitive.
**
***********************************************************************/
{
	while (*src && *pat) {
		if (LO_CASE(*src++) != LO_CASE(*pat++)) return 0;
	}

	if (*pat) return 0;	// if not at end of pat, then error

	return src;
}
Пример #6
0
//
//  Check_Bit: C
// 
// Check bit indicated. Returns TRUE if set.
// If uncased is TRUE, try to match either upper or lower case.
//
REBOOL Check_Bit(REBSER *bset, REBCNT c, REBOOL uncased)
{
    REBCNT i, n = c;
    REBCNT tail = SER_LEN(bset);
    REBOOL flag = FALSE;

    if (uncased) {
        if (n >= UNICODE_CASES) uncased = FALSE; // no need to check
        else n = LO_CASE(c);
    }

    // Check lowercase char:
retry:
    i = n >> 3;
    if (i < tail)
        flag = LOGICAL(BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7))));

    // Check uppercase if needed:
    if (uncased && !flag) {
        n = UP_CASE(c);
        uncased = FALSE;
        goto retry;
    }

    return BITS_NOT(bset) ? NOT(flag) : flag;
}
Пример #7
0
//
//  CT_Char: C
//
REBINT CT_Char(REBVAL *a, REBVAL *b, REBINT mode)
{
    REBINT num;

    if (mode >= 0) {
        if (mode < 2)
            num = LO_CASE(VAL_CHAR(a)) - LO_CASE(VAL_CHAR(b));
        else
            num = VAL_CHAR(a) - VAL_CHAR(b);
        return (num == 0);
    }

    num = VAL_CHAR(a) - VAL_CHAR(b);
    if (mode == -1) return (num >= 0);
    return (num > 0);
}
Пример #8
0
*/	REBFLG Check_Bit(REBSER *bset, REBCNT c, REBFLG uncased)
/*
**		Check bit indicated. Returns TRUE if set.
**		If uncased is TRUE, try to match either upper or lower case.
**
***********************************************************************/
{
	REBCNT i, n = c;
	REBCNT tail = SERIES_TAIL(bset);
	REBFLG flag = 0;

	if (uncased) {
		if (n >= UNICODE_CASES) uncased = FALSE; // no need to check
		else n = LO_CASE(c);
	}

	// Check lowercase char:
retry:
	i = n >> 3;
	if (i < tail)
		flag = (0 != (BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7)))));

	// Check uppercase if needed:
	if (uncased && !flag) {
		n = UP_CASE(c);
		uncased = FALSE;
		goto retry;
	}

	return (BITS_NOT(bset)) ? !flag : flag;
}
Пример #9
0
*/	REBCNT Find_Str_Str(REBSER *ser1, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBSER *ser2, REBCNT index2, REBCNT len, REBCNT flags)
/*
**		General purpose find a substring.
**
**		Supports: forward/reverse with skip, cased/uncase, Unicode/byte.
**
**		Skip can be set positive or negative (for reverse).
**
**		Flags are set according to ALL_FIND_REFS
**
***********************************************************************/
{
	REBUNI c1;
	REBUNI c2;
	REBUNI c3;
	REBCNT n = 0;
	REBOOL uncase = !(flags & AM_FIND_CASE); // uncase = case insenstive

	c2 = GET_ANY_CHAR(ser2, index2); // starting char
	if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2);

	for (; index >= head && index < tail; index += skip) {

		c1 = GET_ANY_CHAR(ser1, index);
		if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1);

		if (c1 == c2) {
			for (n = 1; n < len; n++) {
				c1 = GET_ANY_CHAR(ser1, index+n);
				c3 = GET_ANY_CHAR(ser2, index2+n);
				if (uncase && c1 < UNICODE_CASES && c3 < UNICODE_CASES) {
					if (LO_CASE(c1) != LO_CASE(c3)) break;
				} else {
					if (c1 != c3) break;
				}
			}
			if (n == len) {
				if (flags & AM_FIND_TAIL) return index + len;
				return index;
			}
		}
		if (flags & AM_FIND_MATCH) break;
	}

	return NOT_FOUND;
}
Пример #10
0
//
//  Change_Case: C
// 
// Common code for string case handling.
//
void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper)
{
    REBCNT len;
    REBCNT n;

    *out = *val;

    if (IS_CHAR(val)) {
        REBUNI c = VAL_CHAR(val);
        if (c < UNICODE_CASES) {
            c = upper ? UP_CASE(c) : LO_CASE(c);
        }
        VAL_CHAR(out) = c;
        return;
    }

    // String series:

    FAIL_IF_LOCKED_SERIES(VAL_SERIES(val));

    len = Partial(val, 0, part);
    n = VAL_INDEX(val);
    len += n;

    if (VAL_BYTE_SIZE(val)) {
        REBYTE *bp = VAL_BIN(val);
        if (upper)
            for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]);
        else {
            for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]);
        }
    } else {
        REBUNI *up = VAL_UNI(val);
        if (upper) {
            for (; n < len; n++) {
                if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]);
            }
        }
        else {
            for (; n < len; n++) {
                if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]);
            }
        }
    }
}
Пример #11
0
*/	REBINT Compare_UTF8(REBYTE *s1, REBYTE *s2, REBCNT l2)
/*
**		Compare two UTF8 strings.
**
**		It is necessary to decode the strings to check if the match
**		case-insensitively.
**
**		Returns:
**			-3: no match, s2 > s1
**			-1: no match, s1 > s2
**			 0: exact match
**			 1: non-case match, s2 > s1
**			 3: non-case match, s1 > s2
**
**		So, result + 2 for no-match gives proper sort order.
**		And, result - 2 for non-case match gives sort order.
**
**		Used for: WORD comparison.
**
***********************************************************************/
{
	REBINT c1, c2;
	REBCNT l1 = LEN_BYTES(s1);
	REBINT result = 0;

	for (; l1 > 0 && l2 > 0; s1++, s2++, l1--, l2--) {
		c1 = (REBYTE)*s1;
		c2 = (REBYTE)*s2;
		if (c1 > 127) c1 = Decode_UTF8_Char(&s1, &l1); //!!! can return 0 on error!
		if (c2 > 127) c2 = Decode_UTF8_Char(&s2, &l2);
		if (c1 != c2) {
			if (c1 >= UNICODE_CASES || c2 >= UNICODE_CASES ||
				LO_CASE(c1) != LO_CASE(c2)) {
				return (c1 > c2) ? -1 : -3;
			}
			if (!result) result = (c1 > c2) ? 3 : 1;
		}
	}
	if (l1 != l2) result = (l1 > l2) ? -1 : -3;

	return result;
}
Пример #12
0
*/	REBINT Compare_Bytes(REBYTE *b1, REBYTE *b2, REBCNT len, REBOOL uncase)
/*
**		Compare two byte-wide strings. Return lexical difference.
**
**		Uncase: compare is case-insensitive.
**
***********************************************************************/
{
	REBINT d;

	for (; len > 0; len--, b1++, b2++) {

		if (uncase)
			d = LO_CASE(*b1) - LO_CASE(*b2);
		else
			d = *b1 - *b2;

		if (d != 0) return d;
	}

	return 0;
}
Пример #13
0
*/	REBFLG Match_Sub_Path(REBSER *s1, REBSER *s2)
/*
**		Compare two file path series, regardless of char size.
**		Return TRUE if s1 is a subpath of s2.
**		Case insensitive.
**
***********************************************************************/
{
	REBCNT len = s1->tail;
	REBCNT n;
	REBUNI c1 = 0;
	REBUNI c2;

//	Debug_Series(s1);
//	Debug_Series(s2);

	// s1 len must be <= s2 len
	if (len > s2->tail) return FALSE;

	for (n = 0; n < len; n++) { // includes terminator

		c1 = GET_ANY_CHAR(s1, n);
		c2 = GET_ANY_CHAR(s2, n);

		if (c1 < UNICODE_CASES) c1 = LO_CASE(c1);
		if (c2 < UNICODE_CASES) c2 = LO_CASE(c2);

		if (c1 != c2) break;
	}

	// a/b matches: a/b, a/b/, a/b/c
	c2 = GET_ANY_CHAR(s2, n);
	return (
			n >= len  // all chars matched
			&&  // Must be at end or at dir sep:
			(c1 == '/' || c1 == '\\'
			|| c2 == 0 || c2 == '/' || c2 == '\\')
	);
}
Пример #14
0
x*/	REBCNT Match_2_String(REBSER *series, REBCNT index, REBYTE *str, REBCNT len, REBINT uncase)
/*
**		(Evaluate if there is another function to use. ???!!!)
**
**		Used for: PARSE function
**
***********************************************************************/
{
	REBYTE *ser = STR_SKIP(series, index);
	REBCNT tail = series->tail;

	if (uncase) {
		for (;len > 0 && index < tail; index++, len--) {
			if (*ser++ != *str++) return 0;
		}
	} else {
		for (;len > 0 && index < tail; index++, len--) {
			if (LO_CASE(*ser++) != LO_CASE(*str++)) return 0;
		}
	}
	if (len == 0) return index;
	return 0;
}
Пример #15
0
//
//  Change_Case: C
//
// Common code for string case handling.
//
void Change_Case(
    REBVAL *out,
    REBVAL *val, // !!! Not const--uses Partial(), may change index, review
    const REBVAL *part,
    bool upper
){
    if (IS_CHAR(val)) {
        REBUNI c = VAL_CHAR(val);
        Init_Char_Unchecked(out, upper ? UP_CASE(c) : LO_CASE(c));
        return;
    }

    assert(ANY_STRING(val));
    FAIL_IF_READ_ONLY(val);

    // This is a mutating operation, and we want to return the same series at
    // the same index.  However, R3-Alpha code would use Partial() and may
    // change val's index.  Capture it before potential change, review.
    //
    Move_Value(out, val);

    REBCNT len = Part_Len_May_Modify_Index(val, part);

    // !!! This assumes that all case changes will preserve the encoding size,
    // but that's not true (some strange multibyte accented characters have
    // capital or lowercase versions that are single byte).  This may be
    // uncommon enough to have special handling (only do something weird, e.g.
    // use the mold buffer, if it happens...for the remaining portion of such
    // a string...and only if the size *expands*).  Expansions also may never
    // be possible, only contractions (is that true?)  Review when UTF-8
    // Everywhere is more mature to the point this is worth worrying about.
    //
    REBCHR(*) up = VAL_STRING_AT(val);
    REBCHR(*) dp;
    if (upper) {
        REBCNT n;
        for (n = 0; n < len; n++) {
            dp = up;

            REBUNI c;
            up = NEXT_CHR(&c, up);
            if (c < UNICODE_CASES) {
                dp = WRITE_CHR(dp, UP_CASE(c));
                assert(dp == up); // !!! not all case changes same byte size?
            }
        }
    }
    else {
        REBCNT n;
        for (n = 0; n < len; n++) {
            dp = up;

            REBUNI c;
            up = NEXT_CHR(&c, up);
            if (c < UNICODE_CASES) {
                dp = WRITE_CHR(dp, LO_CASE(c));
                assert(dp == up); // !!! not all case changes same byte size?
            }
        }
    }
}