*/ REBINT Compare_Uni_Str(REBUNI *u1, REBUNI *u2, REBCNT len, REBOOL uncase) /* ** Compare two unicode-wide strings. Return lexical difference. ** ** Uncase: compare is case-insensitive. ** ***********************************************************************/ { REBINT d; REBUNI c1; REBUNI c2; for (; len > 0; len--) { c1 = *u1++; c2 = *u2++; if (uncase && c1 < UNICODE_CASES && c2 < UNICODE_CASES) d = LO_CASE(c1) - LO_CASE(c2); else d = c1 - c2; if (d != 0) return d; } return 0; }
*/ REBCNT Find_Str_Char(REBSER *ser, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBUNI c2, REBCNT flags) /* ** General purpose find a char in a string. ** ** Supports: forward/reverse with skip, cased/uncase, Unicode/byte. ** ** Skip can be set positive or negative (for reverse). ** ** Flags are set according to ALL_FIND_REFS ** ***********************************************************************/ { REBUNI c1; REBOOL uncase = !GET_FLAG(flags, ARG_FIND_CASE-1); // uncase = case insenstive if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2); for (; index >= head && index < tail; index += skip) { c1 = GET_ANY_CHAR(ser, index); if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c1 == c2) return index; if GET_FLAG(flags, ARG_FIND_MATCH-1) break; } return NOT_FOUND; }
*/ REBCNT Find_Byte_Str(REBSER *series, REBCNT index, REBYTE *b2, REBCNT l2, REBFLG uncase, REBFLG match) /* ** Find a byte string within a byte string. Optimized for speed. ** ** Returns starting position or NOT_FOUND. ** ** Uncase: compare is case-insensitive. ** Match: compare to first position only. ** ** NOTE: Series tail must be > index. ** ***********************************************************************/ { REBYTE *b1; REBYTE *e1; REBCNT l1; REBYTE c; REBCNT n; // The pattern empty or is longer than the target: if (l2 == 0 || (l2 + index) > SERIES_TAIL(series)) return NOT_FOUND; b1 = BIN_SKIP(series, index); l1 = SERIES_TAIL(series) - index; e1 = b1 + (match ? 1 : l1 - (l2 - 1)); c = *b2; // first char if (!uncase) { while (b1 != e1) { if (*b1 == c) { // matched first char for (n = 1; n < l2; n++) { if (b1[n] != b2[n]) break; } if (n == l2) return (b1 - BIN_HEAD(series)); } b1++; } } else { c = (REBYTE)LO_CASE(c); // OK! (never > 255) while (b1 != e1) { if (LO_CASE(*b1) == c) { // matched first char for (n = 1; n < l2; n++) { if (LO_CASE(b1[n]) != LO_CASE(b2[n])) break; } if (n == l2) return (b1 - BIN_HEAD(series)); } b1++; } } return NOT_FOUND; }
*/ void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper) /* ** Common code for string case handling. ** ***********************************************************************/ { REBCNT len; REBCNT n; *out = *val; if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); if (c < UNICODE_CASES) { c = upper ? UP_CASE(c) : LO_CASE(c); } VAL_CHAR(out) = c; return; } // String series: if (IS_PROTECT_SERIES(VAL_SERIES(val))) raise Error_0(RE_PROTECTED); len = Partial(val, 0, part, 0); n = VAL_INDEX(val); len += n; if (VAL_BYTE_SIZE(val)) { REBYTE *bp = VAL_BIN(val); if (upper) for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]); else { for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]); } } else { REBUNI *up = VAL_UNI(val); if (upper) { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]); } } else { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]); } } } }
*/ REBYTE *Match_Bytes(REBYTE *src, REBYTE *pat) /* ** Compare two binary strings. Return where the first differed. ** Case insensitive. ** ***********************************************************************/ { while (*src && *pat) { if (LO_CASE(*src++) != LO_CASE(*pat++)) return 0; } if (*pat) return 0; // if not at end of pat, then error return src; }
// // Check_Bit: C // // Check bit indicated. Returns TRUE if set. // If uncased is TRUE, try to match either upper or lower case. // REBOOL Check_Bit(REBSER *bset, REBCNT c, REBOOL uncased) { REBCNT i, n = c; REBCNT tail = SER_LEN(bset); REBOOL flag = FALSE; if (uncased) { if (n >= UNICODE_CASES) uncased = FALSE; // no need to check else n = LO_CASE(c); } // Check lowercase char: retry: i = n >> 3; if (i < tail) flag = LOGICAL(BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7)))); // Check uppercase if needed: if (uncased && !flag) { n = UP_CASE(c); uncased = FALSE; goto retry; } return BITS_NOT(bset) ? NOT(flag) : flag; }
// // CT_Char: C // REBINT CT_Char(REBVAL *a, REBVAL *b, REBINT mode) { REBINT num; if (mode >= 0) { if (mode < 2) num = LO_CASE(VAL_CHAR(a)) - LO_CASE(VAL_CHAR(b)); else num = VAL_CHAR(a) - VAL_CHAR(b); return (num == 0); } num = VAL_CHAR(a) - VAL_CHAR(b); if (mode == -1) return (num >= 0); return (num > 0); }
*/ REBFLG Check_Bit(REBSER *bset, REBCNT c, REBFLG uncased) /* ** Check bit indicated. Returns TRUE if set. ** If uncased is TRUE, try to match either upper or lower case. ** ***********************************************************************/ { REBCNT i, n = c; REBCNT tail = SERIES_TAIL(bset); REBFLG flag = 0; if (uncased) { if (n >= UNICODE_CASES) uncased = FALSE; // no need to check else n = LO_CASE(c); } // Check lowercase char: retry: i = n >> 3; if (i < tail) flag = (0 != (BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7))))); // Check uppercase if needed: if (uncased && !flag) { n = UP_CASE(c); uncased = FALSE; goto retry; } return (BITS_NOT(bset)) ? !flag : flag; }
*/ REBCNT Find_Str_Str(REBSER *ser1, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBSER *ser2, REBCNT index2, REBCNT len, REBCNT flags) /* ** General purpose find a substring. ** ** Supports: forward/reverse with skip, cased/uncase, Unicode/byte. ** ** Skip can be set positive or negative (for reverse). ** ** Flags are set according to ALL_FIND_REFS ** ***********************************************************************/ { REBUNI c1; REBUNI c2; REBUNI c3; REBCNT n = 0; REBOOL uncase = !(flags & AM_FIND_CASE); // uncase = case insenstive c2 = GET_ANY_CHAR(ser2, index2); // starting char if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2); for (; index >= head && index < tail; index += skip) { c1 = GET_ANY_CHAR(ser1, index); if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c1 == c2) { for (n = 1; n < len; n++) { c1 = GET_ANY_CHAR(ser1, index+n); c3 = GET_ANY_CHAR(ser2, index2+n); if (uncase && c1 < UNICODE_CASES && c3 < UNICODE_CASES) { if (LO_CASE(c1) != LO_CASE(c3)) break; } else { if (c1 != c3) break; } } if (n == len) { if (flags & AM_FIND_TAIL) return index + len; return index; } } if (flags & AM_FIND_MATCH) break; } return NOT_FOUND; }
// // Change_Case: C // // Common code for string case handling. // void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper) { REBCNT len; REBCNT n; *out = *val; if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); if (c < UNICODE_CASES) { c = upper ? UP_CASE(c) : LO_CASE(c); } VAL_CHAR(out) = c; return; } // String series: FAIL_IF_LOCKED_SERIES(VAL_SERIES(val)); len = Partial(val, 0, part); n = VAL_INDEX(val); len += n; if (VAL_BYTE_SIZE(val)) { REBYTE *bp = VAL_BIN(val); if (upper) for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]); else { for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]); } } else { REBUNI *up = VAL_UNI(val); if (upper) { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]); } } else { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]); } } } }
*/ REBINT Compare_UTF8(REBYTE *s1, REBYTE *s2, REBCNT l2) /* ** Compare two UTF8 strings. ** ** It is necessary to decode the strings to check if the match ** case-insensitively. ** ** Returns: ** -3: no match, s2 > s1 ** -1: no match, s1 > s2 ** 0: exact match ** 1: non-case match, s2 > s1 ** 3: non-case match, s1 > s2 ** ** So, result + 2 for no-match gives proper sort order. ** And, result - 2 for non-case match gives sort order. ** ** Used for: WORD comparison. ** ***********************************************************************/ { REBINT c1, c2; REBCNT l1 = LEN_BYTES(s1); REBINT result = 0; for (; l1 > 0 && l2 > 0; s1++, s2++, l1--, l2--) { c1 = (REBYTE)*s1; c2 = (REBYTE)*s2; if (c1 > 127) c1 = Decode_UTF8_Char(&s1, &l1); //!!! can return 0 on error! if (c2 > 127) c2 = Decode_UTF8_Char(&s2, &l2); if (c1 != c2) { if (c1 >= UNICODE_CASES || c2 >= UNICODE_CASES || LO_CASE(c1) != LO_CASE(c2)) { return (c1 > c2) ? -1 : -3; } if (!result) result = (c1 > c2) ? 3 : 1; } } if (l1 != l2) result = (l1 > l2) ? -1 : -3; return result; }
*/ REBINT Compare_Bytes(REBYTE *b1, REBYTE *b2, REBCNT len, REBOOL uncase) /* ** Compare two byte-wide strings. Return lexical difference. ** ** Uncase: compare is case-insensitive. ** ***********************************************************************/ { REBINT d; for (; len > 0; len--, b1++, b2++) { if (uncase) d = LO_CASE(*b1) - LO_CASE(*b2); else d = *b1 - *b2; if (d != 0) return d; } return 0; }
*/ REBFLG Match_Sub_Path(REBSER *s1, REBSER *s2) /* ** Compare two file path series, regardless of char size. ** Return TRUE if s1 is a subpath of s2. ** Case insensitive. ** ***********************************************************************/ { REBCNT len = s1->tail; REBCNT n; REBUNI c1 = 0; REBUNI c2; // Debug_Series(s1); // Debug_Series(s2); // s1 len must be <= s2 len if (len > s2->tail) return FALSE; for (n = 0; n < len; n++) { // includes terminator c1 = GET_ANY_CHAR(s1, n); c2 = GET_ANY_CHAR(s2, n); if (c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c2 < UNICODE_CASES) c2 = LO_CASE(c2); if (c1 != c2) break; } // a/b matches: a/b, a/b/, a/b/c c2 = GET_ANY_CHAR(s2, n); return ( n >= len // all chars matched && // Must be at end or at dir sep: (c1 == '/' || c1 == '\\' || c2 == 0 || c2 == '/' || c2 == '\\') ); }
x*/ REBCNT Match_2_String(REBSER *series, REBCNT index, REBYTE *str, REBCNT len, REBINT uncase) /* ** (Evaluate if there is another function to use. ???!!!) ** ** Used for: PARSE function ** ***********************************************************************/ { REBYTE *ser = STR_SKIP(series, index); REBCNT tail = series->tail; if (uncase) { for (;len > 0 && index < tail; index++, len--) { if (*ser++ != *str++) return 0; } } else { for (;len > 0 && index < tail; index++, len--) { if (LO_CASE(*ser++) != LO_CASE(*str++)) return 0; } } if (len == 0) return index; return 0; }
// // Change_Case: C // // Common code for string case handling. // void Change_Case( REBVAL *out, REBVAL *val, // !!! Not const--uses Partial(), may change index, review const REBVAL *part, bool upper ){ if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); Init_Char_Unchecked(out, upper ? UP_CASE(c) : LO_CASE(c)); return; } assert(ANY_STRING(val)); FAIL_IF_READ_ONLY(val); // This is a mutating operation, and we want to return the same series at // the same index. However, R3-Alpha code would use Partial() and may // change val's index. Capture it before potential change, review. // Move_Value(out, val); REBCNT len = Part_Len_May_Modify_Index(val, part); // !!! This assumes that all case changes will preserve the encoding size, // but that's not true (some strange multibyte accented characters have // capital or lowercase versions that are single byte). This may be // uncommon enough to have special handling (only do something weird, e.g. // use the mold buffer, if it happens...for the remaining portion of such // a string...and only if the size *expands*). Expansions also may never // be possible, only contractions (is that true?) Review when UTF-8 // Everywhere is more mature to the point this is worth worrying about. // REBCHR(*) up = VAL_STRING_AT(val); REBCHR(*) dp; if (upper) { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, UP_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } else { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, LO_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } }