*/ REBSER *Form_Reduce(REBSER *block, REBCNT index) /* ** Reduce a block and then form each value into a string. Return the ** string or NULL if an unwind triggered while reducing. ** ***********************************************************************/ { REBINT start = DSP + 1; REBINT n; REB_MOLD mo = {0}; while (index < BLK_LEN(block)) { index = Do_Next(block, index, 0); if (THROWN(DS_TOP)) { *DS_VALUE(start) = *DS_TOP; DSP = start; return NULL; } } Reset_Mold(&mo); for (n = start; n <= DSP; n++) Mold_Value(&mo, &DS_Base[n], 0); DSP = start; return Copy_String(mo.series, 0, -1); }
*/ REBSER *Parse_Lines(REBSER *src) /* ** Convert a string buffer to a block of strings. ** Note that the string must already be converted ** to REBOL LF format (no CRs). ** ***********************************************************************/ { REBSER *blk; REBUNI c; REBCNT i; REBCNT s; REBVAL *val; REBOOL uni = !BYTE_SIZE(src); REBYTE *bp = BIN_HEAD(src); REBUNI *up = UNI_HEAD(src); blk = BUF_EMIT; RESET_SERIES(blk); // Scan string, looking for LF and CR terminators: for (i = s = 0; i < SERIES_TAIL(src); i++) { c = uni ? up[i] : bp[i]; if (c == LF || c == CR) { val = Append_Value(blk); Set_String(val, Copy_String(src, s, i - s)); VAL_SET_LINE(val); // Skip CRLF if found: if (c == CR && LF == uni ? up[i] : bp[i]) i++; s = i; } } // Partial line (no linefeed): if (s + 1 != i) { val = Append_Value(blk); Set_String(val, Copy_String(src, s, i - s)); VAL_SET_LINE(val); } return Copy_Block(blk, 0); }
*/ REBSER *Split_Lines(REBVAL *val) /* ** Given a string series, split lines on CR-LF. ** Series can be bytes or Unicode. ** ***********************************************************************/ { REBSER *ser = BUF_EMIT; // GC protected (because it is emit buffer) REBSER *str = VAL_SERIES(val); REBCNT len = VAL_LEN(val); REBCNT idx = VAL_INDEX(val); REBCNT start = idx; REBSER *out; REBUNI c; BLK_RESET(ser); while (idx < len) { c = GET_ANY_CHAR(str, idx); if (c == LF || c == CR) { out = Copy_String(str, start, idx - start); val = Alloc_Tail_Array(ser); Val_Init_String(val, out); VAL_SET_OPT(val, OPT_VALUE_LINE); idx++; if (c == CR && GET_ANY_CHAR(str, idx) == LF) idx++; start = idx; } else idx++; } // Possible remainder (no terminator) if (idx > start) { out = Copy_String(str, start, idx - start); val = Alloc_Tail_Array(ser); Val_Init_String(val, out); VAL_SET_OPT(val, OPT_VALUE_LINE); } return Copy_Array_Shallow(ser); }
*/ REBSER *Form_Tight_Block(REBVAL *blk) /* ***********************************************************************/ { REB_MOLD mo = {0}; REBVAL *val; Reset_Mold(&mo); for (val = VAL_BLK_DATA(blk); NOT_END(val); val++) Mold_Value(&mo, val, 0); return Copy_String(mo.series, 0, -1); }
*/ REBSER *Copy_Mold_Value(REBVAL *value, REBCNT opts) /* ** Form a value based on the mold opts provided. ** ***********************************************************************/ { REB_MOLD mo = {0}; mo.opts = opts; Reset_Mold(&mo); Mold_Value(&mo, value, TRUE); return Copy_String(mo.series, 0, -1); }
static REBSER *make_string(REBVAL *arg, REBOOL make) { REBSER *ser = 0; // MAKE <type> 123 if (make && (IS_INTEGER(arg) || IS_DECIMAL(arg))) { ser = Make_Binary(Int32s(arg, 0)); } // MAKE/TO <type> <binary!> else if (IS_BINARY(arg)) { REBYTE *bp = VAL_BIN_DATA(arg); REBCNT len = VAL_LEN(arg); switch (What_UTF(bp, len)) { case 0: break; case 8: // UTF-8 encoded bp += 3; len -= 3; break; default: Trap0(RE_BAD_DECODE); } ser = Decode_UTF_String(bp, len, 8); // UTF-8 } // MAKE/TO <type> <any-string> else if (ANY_BINSTR(arg)) { ser = Copy_String(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN(arg)); } // MAKE/TO <type> <any-word> else if (ANY_WORD(arg)) { ser = Copy_Mold_Value(arg, TRUE); //ser = Append_UTF8(0, Get_Word_Name(arg), -1); } // MAKE/TO <type> #"A" else if (IS_CHAR(arg)) { ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2); Append_Byte(ser, VAL_CHAR(arg)); } // MAKE/TO <type> <any-value> // else if (IS_NONE(arg)) { // ser = Make_Binary(0); // } else ser = Copy_Form_Value(arg, 1<<MOPT_TIGHT); return ser; }
*/ static REBSER *File_List_To_Block(REBCHR *str) /* ** Convert file directory and file name list to block. ** ***********************************************************************/ { REBCNT n; REBCNT len = 0; REBCHR *start = str; REBSER *blk; REBSER *dir; while (n = LEN_STR(str)) { len++; str += n + 1; // next } blk = Make_Block(len); SAVE_SERIES(blk); // First is a dir path or full file path: str = start; n = LEN_STR(str); if (len == 1) { // First is full file path dir = To_REBOL_Path(str, n, -1, 0); Set_Series(REB_FILE, Append_Value(blk), dir); } else { // First is dir path for the rest of the files dir = To_REBOL_Path(str, n, -1, TRUE); str += n + 1; // next len = dir->tail; while (n = LEN_STR(str)) { dir->tail = len; Append_Uni_Uni(dir, str, n); Set_Series(REB_FILE, Append_Value(blk), Copy_String(dir, 0, -1)); str += n + 1; // next } } UNSAVE_SERIES(blk); return blk; }
*/ REBSER *Form_Reduce(REBSER *block, REBCNT index) /* ***********************************************************************/ { REBINT start = DSP + 1; REBINT n; REB_MOLD mo = {0}; while (index < BLK_LEN(block)) { index = Do_Next(block, index, 0); } Reset_Mold(&mo); for (n = start; n <= DSP; n++) Mold_Value(&mo, &DS_Base[n], 0); DSP = start; return Copy_String(mo.series, 0, -1); }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip) { REBSER *buffer; // buffer for building the return series REBCNT i; REBINT h = TRUE; REBFLG first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; // This routine should only be called with SERIES! values assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate i as length of result block. i = VAL_LEN(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series buffer = BUF_EMIT; // use preallocated shared block Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2: if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, cased); // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { REBVAL *item = BLK_SKIP(ser, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) Find_Key(buffer, hret, item, skip, cased, 2); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = Copy_Array_Shallow(buffer); RESET_TAIL(buffer); // required - allow reuse } else { if (IS_BINARY(val1)) { // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) cased = TRUE; } buffer = BUF_MOLD; Reset_Buffer(buffer, i); RESET_TAIL(buffer); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( buffer, 0, 0, SERIES_TAIL(buffer), skip, uc, cased ? AM_FIND_CASE : 0 ) ) { Append_String(buffer, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Copy_String(buffer, 0, -1); } return out_ser; }
*/ static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags) /* ** Do set operations on a series. ** ***********************************************************************/ { REBVAL *val; REBVAL *val1; REBVAL *val2 = 0; REBSER *ser; REBSER *hser = 0; // hash table for series REBSER *retser; // return series REBSER *hret; // hash table for return series REBCNT i; REBINT h = TRUE; REBCNT skip = 1; // record size REBCNT cased = 0; // case sensitive when TRUE SET_NONE(D_OUT); val1 = D_ARG(1); i = 2; // Check for second series argument: if (flags != SET_OP_UNIQUE) { val2 = D_ARG(i++); if (VAL_TYPE(val1) != VAL_TYPE(val2)) raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)); } // Refinements /case and /skip N cased = D_REF(i++); // cased if (D_REF(i++)) skip = Int32s(D_ARG(i), 1); switch (VAL_TYPE(val1)) { case REB_BLOCK: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_EMIT; // use preallocated shared block Resize_Series(retser, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { // Check what is in series1 but not in series2: if (GET_FLAG(flags, SOP_CHECK)) hser = Hash_Block(val2, cased); // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) { if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h) Find_Key(retser, hret, val, skip, cased, 2); } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } if (GET_FLAG(flags, SOP_CHECK)) Free_Series(hser); } while (i); if (hret) Free_Series(hret); Val_Init_Block(D_OUT, Copy_Array_Shallow(retser)); RESET_TAIL(retser); // required - allow reuse break; case REB_BINARY: cased = TRUE; SET_TYPE(D_OUT, REB_BINARY); case REB_STRING: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_MOLD; Reset_Buffer(retser, i); RESET_TAIL(retser); do { REBUNI uc; cased = cased ? AM_FIND_CASE : 0; // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) { Append_String(retser, ser, i, skip); } } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } } while (i); ser = Copy_String(retser, 0, -1); if (IS_BINARY(D_OUT)) Val_Init_Binary(D_OUT, ser); else Val_Init_String(D_OUT, ser); break; case REB_BITSET: switch (flags) { case SET_OP_UNIQUE: return R_ARG1; case SET_OP_UNION: i = A_OR; break; case SET_OP_INTERSECT: i = A_AND; break; case SET_OP_DIFFERENCE: i = A_XOR; break; case SET_OP_EXCLUDE: i = 0; // special case break; } ser = Xandor_Binary(i, val1, val2); Val_Init_Bitset(D_OUT, ser); break; case REB_TYPESET: switch (flags) { case SET_OP_UNIQUE: break; case SET_OP_UNION: VAL_TYPESET(val1) |= VAL_TYPESET(val2); break; case SET_OP_INTERSECT: VAL_TYPESET(val1) &= VAL_TYPESET(val2); break; case SET_OP_DIFFERENCE: VAL_TYPESET(val1) ^= VAL_TYPESET(val2); break; case SET_OP_EXCLUDE: VAL_TYPESET(val1) &= ~VAL_TYPESET(val2); break; } return R_ARG1; default: raise Error_Invalid_Arg(val1); } return R_OUT; }
*/ static REBCNT Parse_Rules_Loop(REBPARSE *parse, REBCNT index, REBVAL *rules, REBCNT depth) /* ***********************************************************************/ { REBSER *series = parse->series; REBVAL *item; // current rule item REBVAL *word; // active word to be set REBCNT start; // recovery restart point REBCNT i; // temp index point REBCNT begin; // point at beginning of match REBINT count; // iterated pattern counter REBINT mincount; // min pattern count REBINT maxcount; // max pattern count REBVAL *item_hold; REBVAL *val; // spare REBCNT rulen; REBSER *ser; REBFLG flags; REBCNT cmd; REBVAL *rule_head = rules; CHECK_STACK(&flags); //if (depth > MAX_PARSE_DEPTH) Trap_Word(RE_LIMIT_HIT, SYM_PARSE, 0); flags = 0; word = 0; mincount = maxcount = 1; start = begin = index; // For each rule in the rule block: while (NOT_END(rules)) { //Print_Parse_Index(parse->type, rules, series, index); if (--Eval_Count <= 0 || Eval_Signals) Do_Signals(); //-------------------------------------------------------------------- // Pre-Rule Processing Section // // For non-iterated rules, including setup for iterated rules. // The input index is not advanced here, but may be changed by // a GET-WORD variable. //-------------------------------------------------------------------- item = rules++; // If word, set-word, or get-word, process it: if (VAL_TYPE(item) >= REB_WORD && VAL_TYPE(item) <= REB_GET_WORD) { // Is it a command word? if (cmd = VAL_CMD(item)) { if (!IS_WORD(item)) Trap1(RE_PARSE_COMMAND, item); // SET or GET not allowed if (cmd <= SYM_BREAK) { // optimization switch (cmd) { case SYM_OR_BAR: return index; // reached it successfully // Note: mincount = maxcount = 1 on entry case SYM_WHILE: SET_FLAG(flags, PF_WHILE); case SYM_ANY: mincount = 0; case SYM_SOME: maxcount = MAX_I32; continue; case SYM_OPT: mincount = 0; continue; case SYM_COPY: SET_FLAG(flags, PF_COPY); case SYM_SET: SET_FLAG(flags, PF_SET); item = rules++; if (!IS_WORD(item)) Trap1(RE_PARSE_VARIABLE, item); if (VAL_CMD(item)) Trap1(RE_PARSE_COMMAND, item); word = item; continue; case SYM_NOT: SET_FLAG(flags, PF_NOT); flags ^= (1<<PF_NOT2); continue; case SYM_AND: SET_FLAG(flags, PF_AND); continue; case SYM_THEN: SET_FLAG(flags, PF_THEN); continue; case SYM_REMOVE: SET_FLAG(flags, PF_REMOVE); continue; case SYM_INSERT: SET_FLAG(flags, PF_INSERT); goto post; case SYM_CHANGE: SET_FLAG(flags, PF_CHANGE); continue; case SYM_RETURN: if (IS_PAREN(rules)) { item = Do_Block_Value_Throw(rules); // might GC Throw_Return_Value(item); } SET_FLAG(flags, PF_RETURN); continue; case SYM_ACCEPT: case SYM_BREAK: parse->result = 1; return index; case SYM_REJECT: parse->result = -1; return index; case SYM_FAIL: index = NOT_FOUND; goto post; case SYM_IF: item = rules++; if (IS_END(item)) goto bad_end; if (!IS_PAREN(item)) Trap1(RE_PARSE_RULE, item); item = Do_Block_Value_Throw(item); // might GC if (IS_TRUE(item)) continue; else { index = NOT_FOUND; goto post; } case SYM_LIMIT: Trap0(RE_NOT_DONE); //val = Get_Parse_Value(rules++); // if (IS_INTEGER(val)) limit = index + Int32(val); // else if (ANY_SERIES(val)) limit = VAL_INDEX(val); // else goto //goto bad_rule; // goto post; case SYM_QQ: Print_Parse_Index(parse->type, rules, series, index); continue; } } // Any other cmd must be a match command, so proceed... } else { // It's not a PARSE command, get or set it: // word: - set a variable to the series at current index if (IS_SET_WORD(item)) { Set_Var_Series(item, parse->type, series, index); continue; } // :word - change the index for the series to a new position if (IS_GET_WORD(item)) { item = Get_Var(item); // CureCode #1263 change //if (parse->type != VAL_TYPE(item) || VAL_SERIES(item) != series) // Trap1(RE_PARSE_SERIES, rules-1); if (!ANY_SERIES(item)) Trap1(RE_PARSE_SERIES, rules-1); index = Set_Parse_Series(parse, item); series = parse->series; continue; } // word - some other variable if (IS_WORD(item)) { item = Get_Var(item); } // item can still be 'word or /word } } else if (ANY_PATH(item)) { item = Do_Parse_Path(item, parse, &index); // index can be modified if (index > series->tail) index = series->tail; if (item == 0) continue; // for SET and GET cases } if (IS_PAREN(item)) { Do_Block_Value_Throw(item); // might GC if (index > series->tail) index = series->tail; continue; } // Counter? 123 if (IS_INTEGER(item)) { // Specify count or range count SET_FLAG(flags, PF_WHILE); mincount = maxcount = Int32s(item, 0); item = Get_Parse_Value(rules++); if (IS_END(item)) Trap1(RE_PARSE_END, rules-2); if (IS_INTEGER(item)) { maxcount = Int32s(item, 0); item = Get_Parse_Value(rules++); if (IS_END(item)) Trap1(RE_PARSE_END, rules-2); } } // else fall through on other values and words //-------------------------------------------------------------------- // Iterated Rule Matching Section: // // Repeats the same rule N times or until the rule fails. // The index is advanced and stored in a temp variable i until // the entire rule has been satisfied. //-------------------------------------------------------------------- item_hold = item; // a command or literal match value if (VAL_TYPE(item) <= REB_UNSET || VAL_TYPE(item) >= REB_NATIVE) goto bad_rule; begin = index; // input at beginning of match section rulen = 0; // rules consumed (do not use rule++ below) i = index; //note: rules var already advanced for (count = 0; count < maxcount;) { item = item_hold; if (IS_WORD(item)) { switch (cmd = VAL_WORD_CANON(item)) { case SYM_SKIP: i = (index < series->tail) ? index+1 : NOT_FOUND; break; case SYM_END: i = (index < series->tail) ? NOT_FOUND : series->tail; break; case SYM_TO: case SYM_THRU: if (IS_END(rules)) goto bad_end; item = Get_Parse_Value(rules); rulen = 1; i = Parse_To(parse, index, item, cmd == SYM_THRU); break; case SYM_QUOTE: if (IS_END(rules)) goto bad_end; rulen = 1; if (IS_PAREN(rules)) { item = Do_Block_Value_Throw(rules); // might GC } else item = rules; i = (0 == Cmp_Value(BLK_SKIP(series, index), item, parse->flags & AM_FIND_CASE)) ? index+1 : NOT_FOUND; break; case SYM_INTO: if (IS_END(rules)) goto bad_end; rulen = 1; item = Get_Parse_Value(rules); // sub-rules if (!IS_BLOCK(item)) goto bad_rule; val = BLK_SKIP(series, index); i = ( (ANY_BINSTR(val) || ANY_BLOCK(val)) && (Parse_Series(val, VAL_BLK_DATA(item), parse->flags, depth+1) == VAL_TAIL(val)) ) ? index+1 : NOT_FOUND; break; case SYM_DO: if (!IS_BLOCK_INPUT(parse)) goto bad_rule; i = Do_Eval_Rule(parse, index, &rules); rulen = 1; break; default: goto bad_rule; } } else if (IS_BLOCK(item)) { item = VAL_BLK_DATA(item); //if (IS_END(rules) && item == rule_head) { // rules = item; // goto top; //} i = Parse_Rules_Loop(parse, index, item, depth+1); if (parse->result) { index = (parse->result > 0) ? i : NOT_FOUND; parse->result = 0; break; } } // Parse according to datatype: else { if (IS_BLOCK_INPUT(parse)) i = Parse_Next_Block(parse, index, item, depth+1); else i = Parse_Next_String(parse, index, item, depth+1); } // Necessary for special cases like: some [to end] // i: indicates new index or failure of the match, but // that does not mean failure of the rule, because optional // matches can still succeed, if if the last match failed. if (i != NOT_FOUND) { count++; // may overflow to negative if (count < 0) count = MAX_I32; // the forever case // If input did not advance: if (i == index && !GET_FLAG(flags, PF_WHILE)) { if (count < mincount) index = NOT_FOUND; // was not enough break; } } //if (i >= series->tail) { // OLD check: no more input else { if (count < mincount) index = NOT_FOUND; // was not enough else if (i != NOT_FOUND) index = i; // else keep index as is. break; } index = i; // A BREAK word stopped us: //if (parse->result) {parse->result = 0; break;} } rules += rulen; //if (index > series->tail && index != NOT_FOUND) index = series->tail; if (index > series->tail) index = NOT_FOUND; //-------------------------------------------------------------------- // Post Match Processing: //-------------------------------------------------------------------- post: // Process special flags: if (flags) { // NOT before all others: if (GET_FLAG(flags, PF_NOT)) { if (GET_FLAG(flags, PF_NOT2) && index != NOT_FOUND) index = NOT_FOUND; else index = begin; } if (index == NOT_FOUND) { // Failure actions: // not decided: if (word) Set_Var_Basic(word, REB_NONE); if (GET_FLAG(flags, PF_THEN)) { SKIP_TO_BAR(rules); if (!IS_END(rules)) rules++; } } else { // Success actions: count = (begin > index) ? 0 : index - begin; // how much we advanced the input if (GET_FLAG(flags, PF_COPY)) { ser = (IS_BLOCK_INPUT(parse)) ? Copy_Block_Len(series, begin, count) : Copy_String(series, begin, count); // condenses Set_Var_Series(word, parse->type, ser, 0); } else if (GET_FLAG(flags, PF_SET)) { if (IS_BLOCK_INPUT(parse)) { item = Get_Var_Safe(word); if (count == 0) SET_NONE(item); else *item = *BLK_SKIP(series, begin); } else { item = Get_Var_Safe(word); if (count == 0) SET_NONE(item); else { i = GET_ANY_CHAR(series, begin); if (parse->type == REB_BINARY) { SET_INTEGER(item, i); } else { SET_CHAR(item, i); } } } } if (GET_FLAG(flags, PF_RETURN)) { ser = (IS_BLOCK_INPUT(parse)) ? Copy_Block_Len(series, begin, count) : Copy_String(series, begin, count); // condenses Throw_Return_Series(parse->type, ser); } if (GET_FLAG(flags, PF_REMOVE)) { if (count) Remove_Series(series, begin, count); index = begin; } if (flags & (1<<PF_INSERT | 1<<PF_CHANGE)) { count = GET_FLAG(flags, PF_INSERT) ? 0 : count; cmd = GET_FLAG(flags, PF_INSERT) ? 0 : (1<<AN_PART); item = rules++; if (IS_END(item)) goto bad_end; // Check for ONLY flag: if (IS_WORD(item) && NZ(cmd = VAL_CMD(item))) { if (cmd != SYM_ONLY) goto bad_rule; cmd |= (1<<AN_ONLY); item = rules++; } // CHECK FOR QUOTE!! item = Get_Parse_Value(item); // new value if (IS_UNSET(item)) Trap1(RE_NO_VALUE, rules-1); if (IS_END(item)) goto bad_end; if (IS_BLOCK_INPUT(parse)) { index = Modify_Block(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT, series, begin, item, cmd, count, 1); if (IS_LIT_WORD(item)) SET_TYPE(BLK_SKIP(series, index-1), REB_WORD); } else { if (parse->type == REB_BINARY) cmd |= (1<<AN_SERIES); // special flag index = Modify_String(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT, series, begin, item, cmd, count, 1); } } if (GET_FLAG(flags, PF_AND)) index = begin; } flags = 0; word = 0; } // Goto alternate rule and reset input: if (index == NOT_FOUND) { SKIP_TO_BAR(rules); if (IS_END(rules)) break; rules++; index = begin = start; } begin = index; mincount = maxcount = 1; } return index; bad_rule: Trap1(RE_PARSE_RULE, rules-1); bad_end: Trap1(RE_PARSE_END, rules-1); return 0; }
*/ REBSER *Parse_String(REBSER *series, REBCNT index, REBVAL *rules, REBCNT flags) /* ***********************************************************************/ { REBCNT tail = series->tail; REBSER *blk; REBSER *set; REBCNT begin; REBCNT end; REBOOL skip_spaces = !(flags & PF_ALL); REBUNI uc; blk = BUF_EMIT; // shared series RESET_SERIES(blk); // String of delimiters or single character: if (IS_STRING(rules) || IS_CHAR(rules)) { begin = Find_Max_Bit(rules); if (begin <= ' ') begin = ' ' + 1; set = Make_Bitset(begin); Set_Bits(set, rules, TRUE); } // None, so use defaults ",;": else { set = Make_Bitset(1+MAX(',',';')); Set_Bit(set, ',', TRUE); Set_Bit(set, ';', TRUE); } SAVE_SERIES(set); // If required, make space delimiters too: if (skip_spaces) { for (uc = 1; uc <= ' '; uc++) Set_Bit(set, uc, TRUE); } while (index < tail) { if (--Eval_Count <= 0 || Eval_Signals) Do_Signals(); // Skip whitespace if not /all refinement: if (skip_spaces) { uc = 0; for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (!IS_WHITE(uc)) break; } } else uc = GET_ANY_CHAR(series, index); // prefetch if (index < tail) { // Handle quoted strings (in a simple way): if (uc == '"') { begin = ++index; // eat quote for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (uc == '"') break; } end = index; if (index < tail) index++; } // All other tokens: else { begin = index; for (; index < tail; index++) { if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) break; } end = index; } // Skip trailing spaces: if (skip_spaces) for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (!IS_WHITE(uc)) break; } // Check for and remove separator: if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) index++; // Append new string: Set_String(Append_Value(blk), Copy_String(series, begin, end - begin)); } } UNSAVE_SERIES(set); return Copy_Block(blk, 0); }