static REBCNT find_string(REBSER *series, REBCNT index, REBCNT end, REBVAL *target, REBCNT len, REBCNT flags, REBINT skip) { REBCNT start = index; if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) { skip = -1; start = 0; if (flags & AM_FIND_LAST) index = end - len; else index--; } if (ANY_BINSTR(target)) { // Do the optimal search or the general search? if (BYTE_SIZE(series) && VAL_BYTE_SIZE(target) && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH))) return Find_Byte_Str(series, start, VAL_BIN_DATA(target), len, !GET_FLAG(flags, ARG_FIND_CASE-1), GET_FLAG(flags, ARG_FIND_MATCH-1)); else return Find_Str_Str(series, start, index, end, skip, VAL_SERIES(target), VAL_INDEX(target), len, flags & (AM_FIND_MATCH|AM_FIND_CASE)); } else if (IS_BINARY(target)) { return Find_Byte_Str(series, start, VAL_BIN_DATA(target), len, 0, GET_FLAG(flags, ARG_FIND_MATCH-1)); } else if (IS_CHAR(target)) { return Find_Str_Char(series, start, index, end, skip, VAL_CHAR(target), flags); } else if (IS_INTEGER(target)) { return Find_Str_Char(series, start, index, end, skip, (REBUNI)VAL_INT32(target), flags); } else if (IS_BITSET(target)) { return Find_Str_Bitset(series, start, index, end, skip, VAL_SERIES(target), flags); } return NOT_FOUND; }
*/ static REBCNT Parse_To(REBPARSE *parse, REBCNT index, REBVAL *item, REBFLG is_thru) /* ** Parse TO a specific: ** 1. integer - index position ** 2. END - end of input ** 3. value - according to datatype ** 4. block of values - the first one we hit ** ***********************************************************************/ { REBSER *series = parse->series; REBCNT i; REBSER *ser; // TO a specific index position. if (IS_INTEGER(item)) { i = (REBCNT)Int32(item) - (is_thru ? 0 : 1); if (i > series->tail) i = series->tail; } // END else if (IS_WORD(item) && VAL_WORD_CANON(item) == SYM_END) { i = series->tail; } else if (IS_BLOCK(item)) { i = To_Thru(parse, index, item, is_thru); } else { if (IS_BLOCK_INPUT(parse)) { REBVAL word; /// !!!Temp, but where can we put it? if (IS_LIT_WORD(item)) { // patch to search for word, not lit. word = *item; VAL_SET(&word, REB_WORD); item = &word; } ///i = Find_Value(series, index, tail-index, item, 1, (REBOOL)(PF_CASE & flags), FALSE, 1); i = Find_Block(series, index, series->tail, item, 1, HAS_CASE(parse)?AM_FIND_CASE:0, 1); if (i != NOT_FOUND && is_thru) i++; } else { // "str" if (ANY_BINSTR(item)) { if (!IS_STRING(item) && !IS_BINARY(item)) { // !!! Can this be optimized not to use COPY? ser = Copy_Form_Value(item, 0); i = Find_Str_Str(series, 0, index, series->tail, 1, ser, 0, ser->tail, HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += ser->tail; } else { i = Find_Str_Str(series, 0, index, series->tail, 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += VAL_LEN(item); } } // #"A" else if (IS_CHAR(item)) { i = Find_Str_Char(series, 0, index, series->tail, 1, VAL_CHAR(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i++; } } } return i; }
*/ static REBCNT Parse_Next_String(REBPARSE *parse, REBCNT index, REBVAL *item, REBCNT depth) /* ** Match the next item in the string ruleset. ** ** If it matches, return the index just past it. ** Otherwise return NOT_FOUND. ** ***********************************************************************/ { // !!! THIS CODE NEEDS CLEANUP AND REWRITE BASED ON OTHER CHANGES REBSER *series = parse->series; REBSER *ser; REBCNT flags = parse->flags | AM_FIND_MATCH | AM_FIND_TAIL; int rewrite_needed; if (Trace_Level) { Trace_Value(7, item); Trace_String(8, STR_SKIP(series, index), series->tail - index); } if (IS_NONE(item)) return index; if (index >= series->tail) return NOT_FOUND; switch (VAL_TYPE(item)) { // Do we match a single character? case REB_CHAR: if (HAS_CASE(parse)) index = (VAL_CHAR(item) == GET_ANY_CHAR(series, index)) ? index+1 : NOT_FOUND; else index = (UP_CASE(VAL_CHAR(item)) == UP_CASE(GET_ANY_CHAR(series, index))) ? index+1 : NOT_FOUND; break; case REB_EMAIL: case REB_STRING: case REB_BINARY: index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), flags); break; // Do we match to a char set? case REB_BITSET: flags = Check_Bit(VAL_SERIES(item), GET_ANY_CHAR(series, index), !HAS_CASE(parse)); index = flags ? index + 1 : NOT_FOUND; break; /* case REB_DATATYPE: // Currently: integer! if (VAL_DATATYPE(item) == REB_INTEGER) { REBCNT begin = index; while (IS_LEX_NUMBER(*str)) str++, index++; if (begin == index) index = NOT_FOUND; } break; */ case REB_TAG: case REB_FILE: // case REB_ISSUE: // !! Can be optimized (w/o COPY) ser = Copy_Form_Value(item, 0); index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, ser, 0, ser->tail, flags); break; case REB_NONE: break; // Parse a sub-rule block: case REB_BLOCK: index = Parse_Rules_Loop(parse, index, VAL_BLK_DATA(item), depth); break; // Do an expression: case REB_PAREN: item = Do_Block_Value_Throw(item); // might GC // old: if (IS_ERROR(item)) Throw_Error(VAL_ERR_OBJECT(item)); index = MIN(index, series->tail); // may affect tail break; default: Trap1(RE_PARSE_RULE, item); } return index; }
*/ static To_Thru(REBPARSE *parse, REBCNT index, REBVAL *block, REBFLG is_thru) /* ***********************************************************************/ { REBSER *series = parse->series; REBCNT type = parse->type; REBVAL *blk; REBVAL *item; REBCNT cmd; REBCNT i; REBCNT len; for (; index <= series->tail; index++) { for (blk = VAL_BLK(block); NOT_END(blk); blk++) { item = blk; // Deal with words and commands if (IS_WORD(item)) { if (cmd = VAL_CMD(item)) { if (cmd == SYM_END) { if (index >= series->tail) { index = series->tail; goto found; } goto next; } else if (cmd == SYM_QUOTE) { item = ++blk; // next item is the quoted value if (IS_END(item)) goto bad_target; if (IS_PAREN(item)) { item = Do_Block_Value_Throw(item); // might GC } } else goto bad_target; } else { item = Get_Var(item); } } else if (IS_PATH(item)) { item = Get_Parse_Value(item); } // Try to match it: if (type >= REB_BLOCK) { if (ANY_BLOCK(item)) goto bad_target; i = Parse_Next_Block(parse, index, item, 0); if (i != NOT_FOUND) { if (!is_thru) i--; index = i; goto found; } } else if (type == REB_BINARY) { REBYTE ch1 = *BIN_SKIP(series, index); // Handle special string types: if (IS_CHAR(item)) { if (VAL_CHAR(item) > 0xff) goto bad_target; if (ch1 == VAL_CHAR(item)) goto found1; } else if (IS_BINARY(item)) { if (ch1 == *VAL_BIN_DATA(item)) { len = VAL_LEN(item); if (len == 1) goto found1; if (0 == Compare_Bytes(BIN_SKIP(series, index), VAL_BIN_DATA(item), len, 0)) { if (is_thru) index += len; goto found; } } } else if (IS_INTEGER(item)) { if (VAL_INT64(item) > 0xff) goto bad_target; if (ch1 == VAL_INT32(item)) goto found1; } else goto bad_target; } else { // String REBCNT ch1 = GET_ANY_CHAR(series, index); REBCNT ch2; if (!HAS_CASE(parse)) ch1 = UP_CASE(ch1); // Handle special string types: if (IS_CHAR(item)) { ch2 = VAL_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) goto found1; } else if (ANY_STR(item)) { ch2 = VAL_ANY_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) { len = VAL_LEN(item); if (len == 1) goto found1; i = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), len, AM_FIND_MATCH | parse->flags); if (i != NOT_FOUND) { if (is_thru) i += len; index = i; goto found; } } } else if (IS_INTEGER(item)) { ch1 = GET_ANY_CHAR(series, index); // No casing! if (ch1 == (REBCNT)VAL_INT32(item)) goto found1; } else goto bad_target; } next: // Check for | (required if not end) blk++; if (IS_PAREN(blk)) blk++; if (IS_END(blk)) break; if (!IS_OR_BAR(blk)) { item = blk; goto bad_target; } } } return NOT_FOUND; found: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index; found1: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index + (is_thru ? 1 : 0); bad_target: Trap1(RE_PARSE_RULE, item); return 0; }
static REBCNT find_string( REBSER *series, REBCNT index, REBCNT end, REBVAL *target, REBCNT target_len, REBCNT flags, REBINT skip ) { assert(end >= index); if (target_len > end - index) // series not long enough to have target return NOT_FOUND; REBCNT start = index; if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) { skip = -1; start = 0; if (flags & AM_FIND_LAST) index = end - target_len; else index--; } if (ANY_BINSTR(target)) { // Do the optimal search or the general search? if ( BYTE_SIZE(series) && VAL_BYTE_SIZE(target) && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH)) ) { return Find_Byte_Str( series, start, VAL_BIN_AT(target), target_len, NOT(GET_FLAG(flags, ARG_FIND_CASE - 1)), GET_FLAG(flags, ARG_FIND_MATCH - 1) ); } else { return Find_Str_Str( series, start, index, end, skip, VAL_SERIES(target), VAL_INDEX(target), target_len, flags & (AM_FIND_MATCH|AM_FIND_CASE) ); } } else if (IS_BINARY(target)) { const REBOOL uncase = FALSE; return Find_Byte_Str( series, start, VAL_BIN_AT(target), target_len, uncase, // "don't treat case insensitively" GET_FLAG(flags, ARG_FIND_MATCH - 1) ); } else if (IS_CHAR(target)) { return Find_Str_Char( VAL_CHAR(target), series, start, index, end, skip, flags ); } else if (IS_INTEGER(target)) { return Find_Str_Char( cast(REBUNI, VAL_INT32(target)), series, start, index, end, skip, flags ); } else if (IS_BITSET(target)) { return Find_Str_Bitset( series, start, index, end, skip, VAL_SERIES(target), flags ); } return NOT_FOUND; }