// // Scan_Time: C // // Scan string and convert to time. Return zero if error. // const REBYTE *Scan_Time(const REBYTE *cp, REBCNT len, REBVAL *value) { const REBYTE *sp; REBYTE merid = FALSE; REBOOL neg = FALSE; REBINT part1, part2, part3 = -1; REBINT part4 = -1; if (*cp == '-') cp++, neg = TRUE; else if (*cp == '+') cp++; if (*cp == '-' || *cp == '+') return 0; // small hole: --1:23 // Can be: // HH:MM as part1:part2 // HH:MM:SS as part1:part2:part3 // HH:MM:SS.DD as part1:part2:part3.part4 // MM:SS.DD as part1:part2.part4 cp = Grab_Int(cp, &part1); if (part1 > MAX_HOUR) return 0; if (*cp++ != ':') return 0; sp = Grab_Int(cp, &part2); if (part2 < 0 || sp == cp) return 0; cp = sp; if (*cp == ':') { // optional seconds sp = cp + 1; cp = Grab_Int(sp, &part3); if (part3 < 0 || cp == sp) return 0; //part3 = -1; } if (*cp == '.' || *cp == ',') { sp = ++cp; cp = Grab_Int_Scale(sp, &part4, 9); if (part4 == 0) part4 = -1; } if ((UP_CASE(*cp) == 'A' || UP_CASE(*cp) == 'P') && (UP_CASE(cp[1]) == 'M')) { merid = (REBYTE)UP_CASE(*cp); cp += 2; } if (part3 >= 0 || part4 < 0) { // HH:MM mode if (merid) { if (part1 > 12) return 0; if (part1 == 12) part1 = 0; if (merid == 'P') part1 += 12; } if (part3 < 0) part3 = 0; VAL_TIME(value) = HOUR_TIME(part1) + MIN_TIME(part2) + SEC_TIME(part3); } else { // MM:SS mode if (merid) return 0; // no AM/PM for minutes VAL_TIME(value) = MIN_TIME(part1) + SEC_TIME(part2); } if (part4 > 0) VAL_TIME(value) += part4; if (neg) VAL_TIME(value) = -VAL_TIME(value); VAL_SET(value, REB_TIME); return cp; }
*/ void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper) /* ** Common code for string case handling. ** ***********************************************************************/ { REBCNT len; REBCNT n; *out = *val; if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); if (c < UNICODE_CASES) { c = upper ? UP_CASE(c) : LO_CASE(c); } VAL_CHAR(out) = c; return; } // String series: if (IS_PROTECT_SERIES(VAL_SERIES(val))) raise Error_0(RE_PROTECTED); len = Partial(val, 0, part, 0); n = VAL_INDEX(val); len += n; if (VAL_BYTE_SIZE(val)) { REBYTE *bp = VAL_BIN(val); if (upper) for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]); else { for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]); } } else { REBUNI *up = VAL_UNI(val); if (upper) { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]); } } else { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]); } } } }
// // Check_Bit: C // // Check bit indicated. Returns TRUE if set. // If uncased is TRUE, try to match either upper or lower case. // REBOOL Check_Bit(REBSER *bset, REBCNT c, REBOOL uncased) { REBCNT i, n = c; REBCNT tail = SER_LEN(bset); REBOOL flag = FALSE; if (uncased) { if (n >= UNICODE_CASES) uncased = FALSE; // no need to check else n = LO_CASE(c); } // Check lowercase char: retry: i = n >> 3; if (i < tail) flag = LOGICAL(BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7)))); // Check uppercase if needed: if (uncased && !flag) { n = UP_CASE(c); uncased = FALSE; goto retry; } return BITS_NOT(bset) ? NOT(flag) : flag; }
*/ REBFLG Check_Bit(REBSER *bset, REBCNT c, REBFLG uncased) /* ** Check bit indicated. Returns TRUE if set. ** If uncased is TRUE, try to match either upper or lower case. ** ***********************************************************************/ { REBCNT i, n = c; REBCNT tail = SERIES_TAIL(bset); REBFLG flag = 0; if (uncased) { if (n >= UNICODE_CASES) uncased = FALSE; // no need to check else n = LO_CASE(c); } // Check lowercase char: retry: i = n >> 3; if (i < tail) flag = (0 != (BIN_HEAD(bset)[i] & (1 << (7 - ((n) & 7))))); // Check uppercase if needed: if (uncased && !flag) { n = UP_CASE(c); uncased = FALSE; goto retry; } return (BITS_NOT(bset)) ? !flag : flag; }
// // Change_Case: C // // Common code for string case handling. // void Change_Case(REBVAL *out, REBVAL *val, REBVAL *part, REBOOL upper) { REBCNT len; REBCNT n; *out = *val; if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); if (c < UNICODE_CASES) { c = upper ? UP_CASE(c) : LO_CASE(c); } VAL_CHAR(out) = c; return; } // String series: FAIL_IF_LOCKED_SERIES(VAL_SERIES(val)); len = Partial(val, 0, part); n = VAL_INDEX(val); len += n; if (VAL_BYTE_SIZE(val)) { REBYTE *bp = VAL_BIN(val); if (upper) for (; n < len; n++) bp[n] = (REBYTE)UP_CASE(bp[n]); else { for (; n < len; n++) bp[n] = (REBYTE)LO_CASE(bp[n]); } } else { REBUNI *up = VAL_UNI(val); if (upper) { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = UP_CASE(up[n]); } } else { for (; n < len; n++) { if (up[n] < UNICODE_CASES) up[n] = LO_CASE(up[n]); } } } }
x*/ REBYTE *Match_Str_Part(REBYTE *str, REBYTE *pat, REBCNT len) /* ** If the string matches the pattern for the given length ** return the char string just past the match (in str). ** Else, return 0. A case insensitive compare is made. ** ***********************************************************************/ { REBYTE *pp = pat; REBYTE *cp = str; for (;len > 0 && *pp && *cp; pp++, cp++, len--) { if (UP_CASE(*pp) != UP_CASE(*cp)) return 0; } if (len == 0) return cp; return 0; }
*/ REBINT Cmp_Value(REBVAL *s, REBVAL *t, REBFLG is_case) /* ** Compare two values and return the difference. ** ** is_case TRUE for case sensitive compare ** ***********************************************************************/ { REBDEC d1, d2; if (VAL_TYPE(t) != VAL_TYPE(s) && !(IS_NUMBER(s) && IS_NUMBER(t))) return VAL_TYPE(s) - VAL_TYPE(t); switch(VAL_TYPE(s)) { case REB_INTEGER: if (IS_DECIMAL(t)) { d1 = (REBDEC)VAL_INT64(s); d2 = VAL_DECIMAL(t); goto chkDecimal; } return THE_SIGN(VAL_INT64(s) - VAL_INT64(t)); case REB_LOGIC: return VAL_LOGIC(s) - VAL_LOGIC(t); case REB_CHAR: if (is_case) return THE_SIGN(VAL_CHAR(s) - VAL_CHAR(t)); return THE_SIGN((REBINT)(UP_CASE(VAL_CHAR(s)) - UP_CASE(VAL_CHAR(t)))); case REB_DECIMAL: case REB_MONEY: d1 = VAL_DECIMAL(s); if (IS_INTEGER(t)) d2 = (REBDEC)VAL_INT64(t); else d2 = VAL_DECIMAL(t); chkDecimal: if (Eq_Decimal(d1, d2)) return 0; if (d1 < d2) return -1; return 1; case REB_PAIR: return Cmp_Pair(s, t); case REB_EVENT: return Cmp_Event(s, t); case REB_GOB: return Cmp_Gob(s, t); case REB_TUPLE: return Cmp_Tuple(s, t); case REB_TIME: return Cmp_Time(s, t); case REB_DATE: return Cmp_Date(s, t); case REB_BLOCK: case REB_PAREN: case REB_MAP: case REB_PATH: case REB_SET_PATH: case REB_GET_PATH: case REB_LIT_PATH: return Cmp_Block(s, t, is_case); case REB_STRING: case REB_FILE: case REB_EMAIL: case REB_URL: case REB_TAG: return Compare_String_Vals(s, t, (REBOOL)!is_case); case REB_BITSET: case REB_BINARY: case REB_IMAGE: return Compare_Binary_Vals(s, t); case REB_VECTOR: return Compare_Vector(s, t); case REB_DATATYPE: return VAL_DATATYPE(s) - VAL_DATATYPE(t); case REB_WORD: case REB_SET_WORD: case REB_GET_WORD: case REB_LIT_WORD: case REB_REFINEMENT: case REB_ISSUE: return Compare_Word(s,t,is_case); case REB_ERROR: return VAL_ERR_NUM(s) - VAL_ERR_NUM(s); case REB_OBJECT: case REB_MODULE: case REB_PORT: return VAL_OBJ_FRAME(s) - VAL_OBJ_FRAME(t); case REB_NATIVE: return &VAL_FUNC_CODE(s) - &VAL_FUNC_CODE(t); case REB_ACTION: case REB_COMMAND: case REB_OP: case REB_FUNCTION: return VAL_FUNC_BODY(s) - VAL_FUNC_BODY(t); case REB_NONE: case REB_UNSET: case REB_END: default: break; } return 0; }
*/ static REBINT Scan_Char(REBYTE **bp) /* ** Scan a char, handling ^A, ^/, ^(null), ^(1234) ** ** Returns the numeric value for char, or -1 for errors. ** ** Advances the cp to just past the last position. ** ** test: to-integer load to-binary mold to-char 1234 ** ***********************************************************************/ { REBINT n; REBYTE *cp; REBYTE c; REBYTE lex; c = **bp; // Handle unicoded char: if (c >= 0x80) { n = Decode_UTF8_Char(bp, 0); // zero on error (*bp)++; // skip char return n; } (*bp)++; if (c != '^') return c; // Must be ^ escaped char: c = **bp; (*bp)++; switch (c) { case 0: n = 0; break; case '/': n = LF; break; case '^': n = c; break; case '-': n = TAB; break; case '!': n = '\036'; // record separator break; case '(': // ^(tab) ^(1234) // Check for hex integers ^(1234): cp = *bp; // restart location n = 0; while ((lex = Lex_Map[*cp]) > LEX_WORD) { c = lex & LEX_VALUE; if (!c && lex < LEX_NUMBER) break; n = (n << 4) + c; cp++; } if ((cp - *bp) > 4) return -1; if (*cp == ')') { cp++; *bp = cp; return n; } // Check for identifiers: for (n = 0; n < ESC_MAX; n++) { if (NZ(cp = Match_Bytes(*bp, (REBYTE*)(Esc_Names[n])))) { if (cp && *cp == ')') { *bp = cp + 1; return Esc_Codes[n]; } } } return -1; default: n = UP_CASE(c); if (n >= '@' && n <= '_') n -= '@'; else if (n == '~') n = 0x7f; // special for DEL else n = c; // includes: ^{ ^} ^" } return n; }
// // Change_Case: C // // Common code for string case handling. // void Change_Case( REBVAL *out, REBVAL *val, // !!! Not const--uses Partial(), may change index, review const REBVAL *part, bool upper ){ if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); Init_Char_Unchecked(out, upper ? UP_CASE(c) : LO_CASE(c)); return; } assert(ANY_STRING(val)); FAIL_IF_READ_ONLY(val); // This is a mutating operation, and we want to return the same series at // the same index. However, R3-Alpha code would use Partial() and may // change val's index. Capture it before potential change, review. // Move_Value(out, val); REBCNT len = Part_Len_May_Modify_Index(val, part); // !!! This assumes that all case changes will preserve the encoding size, // but that's not true (some strange multibyte accented characters have // capital or lowercase versions that are single byte). This may be // uncommon enough to have special handling (only do something weird, e.g. // use the mold buffer, if it happens...for the remaining portion of such // a string...and only if the size *expands*). Expansions also may never // be possible, only contractions (is that true?) Review when UTF-8 // Everywhere is more mature to the point this is worth worrying about. // REBCHR(*) up = VAL_STRING_AT(val); REBCHR(*) dp; if (upper) { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, UP_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } else { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, LO_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } }
*/ static To_Thru(REBPARSE *parse, REBCNT index, REBVAL *block, REBFLG is_thru) /* ***********************************************************************/ { REBSER *series = parse->series; REBCNT type = parse->type; REBVAL *blk; REBVAL *item; REBCNT cmd; REBCNT i; REBCNT len; for (; index <= series->tail; index++) { for (blk = VAL_BLK(block); NOT_END(blk); blk++) { item = blk; // Deal with words and commands if (IS_WORD(item)) { if (cmd = VAL_CMD(item)) { if (cmd == SYM_END) { if (index >= series->tail) { index = series->tail; goto found; } goto next; } else if (cmd == SYM_QUOTE) { item = ++blk; // next item is the quoted value if (IS_END(item)) goto bad_target; if (IS_PAREN(item)) { item = Do_Block_Value_Throw(item); // might GC } } else goto bad_target; } else { item = Get_Var(item); } } else if (IS_PATH(item)) { item = Get_Parse_Value(item); } // Try to match it: if (type >= REB_BLOCK) { if (ANY_BLOCK(item)) goto bad_target; i = Parse_Next_Block(parse, index, item, 0); if (i != NOT_FOUND) { if (!is_thru) i--; index = i; goto found; } } else if (type == REB_BINARY) { REBYTE ch1 = *BIN_SKIP(series, index); // Handle special string types: if (IS_CHAR(item)) { if (VAL_CHAR(item) > 0xff) goto bad_target; if (ch1 == VAL_CHAR(item)) goto found1; } else if (IS_BINARY(item)) { if (ch1 == *VAL_BIN_DATA(item)) { len = VAL_LEN(item); if (len == 1) goto found1; if (0 == Compare_Bytes(BIN_SKIP(series, index), VAL_BIN_DATA(item), len, 0)) { if (is_thru) index += len; goto found; } } } else if (IS_INTEGER(item)) { if (VAL_INT64(item) > 0xff) goto bad_target; if (ch1 == VAL_INT32(item)) goto found1; } else goto bad_target; } else { // String REBCNT ch1 = GET_ANY_CHAR(series, index); REBCNT ch2; if (!HAS_CASE(parse)) ch1 = UP_CASE(ch1); // Handle special string types: if (IS_CHAR(item)) { ch2 = VAL_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) goto found1; } else if (ANY_STR(item)) { ch2 = VAL_ANY_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) { len = VAL_LEN(item); if (len == 1) goto found1; i = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), len, AM_FIND_MATCH | parse->flags); if (i != NOT_FOUND) { if (is_thru) i += len; index = i; goto found; } } } else if (IS_INTEGER(item)) { ch1 = GET_ANY_CHAR(series, index); // No casing! if (ch1 == (REBCNT)VAL_INT32(item)) goto found1; } else goto bad_target; } next: // Check for | (required if not end) blk++; if (IS_PAREN(blk)) blk++; if (IS_END(blk)) break; if (!IS_OR_BAR(blk)) { item = blk; goto bad_target; } } } return NOT_FOUND; found: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index; found1: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index + (is_thru ? 1 : 0); bad_target: Trap1(RE_PARSE_RULE, item); return 0; }
*/ static REBCNT Parse_Next_String(REBPARSE *parse, REBCNT index, REBVAL *item, REBCNT depth) /* ** Match the next item in the string ruleset. ** ** If it matches, return the index just past it. ** Otherwise return NOT_FOUND. ** ***********************************************************************/ { // !!! THIS CODE NEEDS CLEANUP AND REWRITE BASED ON OTHER CHANGES REBSER *series = parse->series; REBSER *ser; REBCNT flags = parse->flags | AM_FIND_MATCH | AM_FIND_TAIL; int rewrite_needed; if (Trace_Level) { Trace_Value(7, item); Trace_String(8, STR_SKIP(series, index), series->tail - index); } if (IS_NONE(item)) return index; if (index >= series->tail) return NOT_FOUND; switch (VAL_TYPE(item)) { // Do we match a single character? case REB_CHAR: if (HAS_CASE(parse)) index = (VAL_CHAR(item) == GET_ANY_CHAR(series, index)) ? index+1 : NOT_FOUND; else index = (UP_CASE(VAL_CHAR(item)) == UP_CASE(GET_ANY_CHAR(series, index))) ? index+1 : NOT_FOUND; break; case REB_EMAIL: case REB_STRING: case REB_BINARY: index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), flags); break; // Do we match to a char set? case REB_BITSET: flags = Check_Bit(VAL_SERIES(item), GET_ANY_CHAR(series, index), !HAS_CASE(parse)); index = flags ? index + 1 : NOT_FOUND; break; /* case REB_DATATYPE: // Currently: integer! if (VAL_DATATYPE(item) == REB_INTEGER) { REBCNT begin = index; while (IS_LEX_NUMBER(*str)) str++, index++; if (begin == index) index = NOT_FOUND; } break; */ case REB_TAG: case REB_FILE: // case REB_ISSUE: // !! Can be optimized (w/o COPY) ser = Copy_Form_Value(item, 0); index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, ser, 0, ser->tail, flags); break; case REB_NONE: break; // Parse a sub-rule block: case REB_BLOCK: index = Parse_Rules_Loop(parse, index, VAL_BLK_DATA(item), depth); break; // Do an expression: case REB_PAREN: item = Do_Block_Value_Throw(item); // might GC // old: if (IS_ERROR(item)) Throw_Error(VAL_ERR_OBJECT(item)); index = MIN(index, series->tail); // may affect tail break; default: Trap1(RE_PARSE_RULE, item); } return index; }
// // Cmp_Value: C // // Compare two values and return the difference. // // is_case TRUE for case sensitive compare // REBINT Cmp_Value(const RELVAL *s, const RELVAL *t, REBOOL is_case) { REBDEC d1, d2; if (VAL_TYPE(t) != VAL_TYPE(s) && !(ANY_NUMBER(s) && ANY_NUMBER(t))) return VAL_TYPE(s) - VAL_TYPE(t); assert(NOT_END(s) && NOT_END(t)); switch(VAL_TYPE(s)) { case REB_INTEGER: if (IS_DECIMAL(t)) { d1 = (REBDEC)VAL_INT64(s); d2 = VAL_DECIMAL(t); goto chkDecimal; } return THE_SIGN(VAL_INT64(s) - VAL_INT64(t)); case REB_LOGIC: return VAL_LOGIC(s) - VAL_LOGIC(t); case REB_CHAR: if (is_case) return THE_SIGN(VAL_CHAR(s) - VAL_CHAR(t)); return THE_SIGN((REBINT)(UP_CASE(VAL_CHAR(s)) - UP_CASE(VAL_CHAR(t)))); case REB_PERCENT: case REB_DECIMAL: case REB_MONEY: if (IS_MONEY(s)) d1 = deci_to_decimal(VAL_MONEY_AMOUNT(s)); else d1 = VAL_DECIMAL(s); if (IS_INTEGER(t)) d2 = cast(REBDEC, VAL_INT64(t)); else if (IS_MONEY(t)) d2 = deci_to_decimal(VAL_MONEY_AMOUNT(t)); else d2 = VAL_DECIMAL(t); chkDecimal: if (Eq_Decimal(d1, d2)) return 0; if (d1 < d2) return -1; return 1; case REB_PAIR: return Cmp_Pair(s, t); case REB_EVENT: return Cmp_Event(s, t); case REB_GOB: return Cmp_Gob(s, t); case REB_TUPLE: return Cmp_Tuple(s, t); case REB_TIME: return Cmp_Time(s, t); case REB_DATE: return Cmp_Date(s, t); case REB_BLOCK: case REB_GROUP: case REB_MAP: case REB_PATH: case REB_SET_PATH: case REB_GET_PATH: case REB_LIT_PATH: return Cmp_Array(s, t, is_case); case REB_STRING: case REB_FILE: case REB_EMAIL: case REB_URL: case REB_TAG: return Compare_String_Vals(s, t, NOT(is_case)); case REB_BITSET: case REB_BINARY: case REB_IMAGE: return Compare_Binary_Vals(s, t); case REB_VECTOR: return Compare_Vector(s, t); case REB_DATATYPE: return VAL_TYPE_KIND(s) - VAL_TYPE_KIND(t); case REB_WORD: case REB_SET_WORD: case REB_GET_WORD: case REB_LIT_WORD: case REB_REFINEMENT: case REB_ISSUE: return Compare_Word(s,t,is_case); case REB_ERROR: return VAL_ERR_NUM(s) - VAL_ERR_NUM(t); case REB_OBJECT: case REB_MODULE: case REB_PORT: return VAL_CONTEXT(s) - VAL_CONTEXT(t); case REB_FUNCTION: return VAL_FUNC_PARAMLIST(s) - VAL_FUNC_PARAMLIST(t); case REB_LIBRARY: return VAL_LIBRARY(s) - VAL_LIBRARY(t); case REB_STRUCT: return Cmp_Struct(s, t); case REB_BLANK: case REB_MAX_VOID: default: break; } return 0; }