// // Do_Path_Throws: C // // Evaluate an ANY_PATH! REBVAL, starting from the index position of that // path value and continuing to the end. // // The evaluator may throw because GROUP! is evaluated, e.g. `foo/(throw 1020)` // // If label_sym is passed in as being non-null, then the caller is implying // readiness to process a path which may be a function with refinements. // These refinements will be left in order on the data stack in the case // that `out` comes back as IS_FUNCTION(). // // If `opt_setval` is given, the path operation will be done as a "SET-PATH!" // if the path evaluation did not throw or error. HOWEVER the set value // is NOT put into `out`. This provides more flexibility on performance in // the evaluator, which may already have the `val` where it wants it, and // so the extra assignment would just be overhead. // // !!! Path evaluation is one of the parts of R3-Alpha that has not been // vetted very heavily by Ren-C, and needs a review and overhaul. // REBOOL Do_Path_Throws( REBVAL *out, REBSYM *label_sym, const REBVAL *path, REBVAL *opt_setval ) { REBPVS pvs; REBDSP dsp_orig = DSP; assert(ANY_PATH(path)); // !!! There is a bug in the dispatch such that if you are running a // set path, it does not always assign the output, because it "thinks you // aren't going to look at it". This presumably originated from before // parens were allowed in paths, and neglects cases like: // // foo/(throw 1020): value // // We always have to check to see if a throw occurred. Until this is // streamlined, we have to at minimum set it to something that is *not* // thrown so that we aren't testing uninitialized memory. A safe trash // will do, which is unset in release builds. // if (opt_setval) SET_TRASH_SAFE(out); // None of the values passed in can live on the data stack, because // they might be relocated during the path evaluation process. // assert(!IN_DATA_STACK(out)); assert(!IN_DATA_STACK(path)); assert(!opt_setval || !IN_DATA_STACK(opt_setval)); // Not currently robust for reusing passed in path or value as the output assert(out != path && out != opt_setval); assert(!opt_setval || !THROWN(opt_setval)); // Initialize REBPVS -- see notes in %sys-do.h // pvs.opt_setval = opt_setval; pvs.store = out; pvs.orig = path; pvs.item = VAL_ARRAY_AT(pvs.orig); // may not be starting at head of PATH! // Seed the path evaluation process by looking up the first item (to // get a datatype to dispatch on for the later path items) // if (IS_WORD(pvs.item)) { pvs.value = GET_MUTABLE_VAR_MAY_FAIL(pvs.item); if (IS_UNSET(pvs.value)) fail (Error(RE_NO_VALUE, pvs.item)); } else { // !!! Ideally there would be some way to protect pvs.value during // successive path dispatches to make sure it does not get written. // This is semi-dangerously giving pvs.value a reference into the // input path, which should not be modified! pvs.value = VAL_ARRAY_AT(pvs.orig); } // Start evaluation of path: if (IS_END(pvs.item + 1)) { // If it was a single element path, return the value rather than // try to dispatch it (would cause a crash at time of writing) // // !!! Is this the desired behavior, or should it be an error? } else if (Path_Dispatch[VAL_TYPE_0(pvs.value)]) { REBOOL threw = Next_Path_Throws(&pvs); // !!! See comments about why the initialization of out is necessary. // Without it this assertion can change on some things: // // t: now // t/time: 10:20:03 // // (It thinks pvs.value has its THROWN bit set when it completed // successfully. It was a PE_USE_STORE case where pvs.value was reset to // pvs.store, and pvs.store has its thrown bit set. Valgrind does not // catch any uninitialized variables.) // // There are other cases that do trip valgrind when omitting the // initialization, though not as clearly reproducible. // assert(threw == THROWN(pvs.value)); if (threw) return TRUE; // Check for errors: if (NOT_END(pvs.item + 1) && !IS_FUNCTION(pvs.value)) { // Only function refinements should get by this line: fail (Error(RE_INVALID_PATH, pvs.orig, pvs.item)); } } else if (!IS_FUNCTION(pvs.value)) fail (Error(RE_BAD_PATH_TYPE, pvs.orig, Type_Of(pvs.value))); if (opt_setval) { // If SET then we don't return anything assert(IS_END(pvs.item) + 1); return FALSE; } // If storage was not used, then copy final value back to it: if (pvs.value != pvs.store) *pvs.store = *pvs.value; assert(!THROWN(out)); // Return 0 if not function or is :path/word... if (!IS_FUNCTION(pvs.value)) { assert(IS_END(pvs.item) + 1); return FALSE; } if (label_sym) { REBVAL refinement; VAL_INIT_WRITABLE_DEBUG(&refinement); // When a function is hit, path processing stops as soon as the // processed sub-path resolves to a function. The path is still sitting // on the position of the last component of that sub-path. Usually, // this last component in the sub-path is a word naming the function. // if (IS_WORD(pvs.item)) { *label_sym = VAL_WORD_SYM(pvs.item); } else { // In rarer cases, the final component (completing the sub-path to // the function to call) is not a word. Such as when you use a path // to pick by index out of a block of functions: // // functions: reduce [:add :subtract] // functions/1 10 20 // // Or when you have an immediate function value in a path with a // refinement. Tricky to make, but possible: // // do reduce [ // to-path reduce [:append 'only] [a] [b] // ] // // !!! When a function was not invoked through looking up a word // (or a word in a path) to use as a label, there were once three // different alternate labels used. One was SYM__APPLY_, another // was ROOT_NONAME, and another was to be the type of the function // being executed. None are fantastic, we do the type for now. *label_sym = SYM_FROM_KIND(VAL_TYPE(pvs.value)); } // Move on to the refinements (if any) ++pvs.item; // !!! Currently, the mainline path evaluation "punts" on refinements. // When it finds a function, it stops the path evaluation and leaves // the position pvs.path before the list of refinements. // // A more elegant solution would be able to process and notice (for // instance) that `:APPEND/ONLY` should yield a function value that // has been specialized with a refinement. Path chaining should thus // be able to effectively do this and give the refined function object // back to the evaluator or other client. // // If a label_sym is passed in, we recognize that a function dispatch // is going to be happening. We do not want to pay to generate the // new series that would be needed to make a temporary function that // will be invoked and immediately GC'd So we gather the refinements // on the data stack. // // This code simulates that path-processing-to-data-stack, but it // should really be something in dispatch iself. In any case, we put // refinements on the data stack...and caller knows refinements are // from dsp_orig to DSP (thanks to accounting, all other operations // should balance!) for (; NOT_END(pvs.item); ++pvs.item) { // "the refinements" if (IS_NONE(pvs.item)) continue; if (IS_GROUP(pvs.item)) { // Note it is not legal to use the data stack directly as the // output location for a DO (might be resized) if (DO_VAL_ARRAY_AT_THROWS(&refinement, pvs.item)) { *out = refinement; DS_DROP_TO(dsp_orig); return TRUE; } if (IS_NONE(&refinement)) continue; DS_PUSH(&refinement); } else if (IS_GET_WORD(pvs.item)) { DS_PUSH_TRASH; *DS_TOP = *GET_OPT_VAR_MAY_FAIL(pvs.item); if (IS_NONE(DS_TOP)) { DS_DROP; continue; } } else DS_PUSH(pvs.item); // Whatever we were trying to use as a refinement should now be // on the top of the data stack, and only words are legal ATM // if (!IS_WORD(DS_TOP)) fail (Error(RE_BAD_REFINE, DS_TOP)); // Go ahead and canonize the word symbol so we don't have to // do it each time in order to get a case-insenstive compare // INIT_WORD_SYM(DS_TOP, SYMBOL_TO_CANON(VAL_WORD_SYM(DS_TOP))); } // To make things easier for processing, reverse the refinements on // the data stack (we needed to evaluate them in forward order). // This way we can just pop them as we go, and know if they weren't // all consumed if it doesn't get back to `dsp_orig` by the end. if (dsp_orig != DSP) { REBVAL *bottom = DS_AT(dsp_orig + 1); REBVAL *top = DS_TOP; while (top > bottom) { refinement = *bottom; *bottom = *top; *top = refinement; top--; bottom++; } } } else { // !!! Historically this just ignores a result indicating this is a // function with refinements, e.g. ':append/only'. However that // ignoring seems unwise. It should presumably create a modified // function in that case which acts as if it has the refinement. // // If the caller did not pass in a label pointer we assume they are // likely not ready to process any refinements. // if (NOT_END(pvs.item + 1)) fail (Error(RE_TOO_LONG)); // !!! Better error or add feature } return FALSE; }
*/ static REBCNT Parse_Rules_Loop(REBPARSE *parse, REBCNT index, REBVAL *rules, REBCNT depth) /* ***********************************************************************/ { REBSER *series = parse->series; REBVAL *item; // current rule item REBVAL *word; // active word to be set REBCNT start; // recovery restart point REBCNT i; // temp index point REBCNT begin; // point at beginning of match REBINT count; // iterated pattern counter REBINT mincount; // min pattern count REBINT maxcount; // max pattern count REBVAL *item_hold; REBVAL *val; // spare REBCNT rulen; REBSER *ser; REBFLG flags; REBCNT cmd; REBVAL *rule_head = rules; CHECK_STACK(&flags); //if (depth > MAX_PARSE_DEPTH) Trap_Word(RE_LIMIT_HIT, SYM_PARSE, 0); flags = 0; word = 0; mincount = maxcount = 1; start = begin = index; // For each rule in the rule block: while (NOT_END(rules)) { //Print_Parse_Index(parse->type, rules, series, index); if (--Eval_Count <= 0 || Eval_Signals) Do_Signals(); //-------------------------------------------------------------------- // Pre-Rule Processing Section // // For non-iterated rules, including setup for iterated rules. // The input index is not advanced here, but may be changed by // a GET-WORD variable. //-------------------------------------------------------------------- item = rules++; // If word, set-word, or get-word, process it: if (VAL_TYPE(item) >= REB_WORD && VAL_TYPE(item) <= REB_GET_WORD) { // Is it a command word? if (cmd = VAL_CMD(item)) { if (!IS_WORD(item)) Trap1(RE_PARSE_COMMAND, item); // SET or GET not allowed if (cmd <= SYM_BREAK) { // optimization switch (cmd) { case SYM_OR_BAR: return index; // reached it successfully // Note: mincount = maxcount = 1 on entry case SYM_WHILE: SET_FLAG(flags, PF_WHILE); case SYM_ANY: mincount = 0; case SYM_SOME: maxcount = MAX_I32; continue; case SYM_OPT: mincount = 0; continue; case SYM_COPY: SET_FLAG(flags, PF_COPY); case SYM_SET: SET_FLAG(flags, PF_SET); item = rules++; if (!IS_WORD(item)) Trap1(RE_PARSE_VARIABLE, item); if (VAL_CMD(item)) Trap1(RE_PARSE_COMMAND, item); word = item; continue; case SYM_NOT: SET_FLAG(flags, PF_NOT); flags ^= (1<<PF_NOT2); continue; case SYM_AND: SET_FLAG(flags, PF_AND); continue; case SYM_THEN: SET_FLAG(flags, PF_THEN); continue; case SYM_REMOVE: SET_FLAG(flags, PF_REMOVE); continue; case SYM_INSERT: SET_FLAG(flags, PF_INSERT); goto post; case SYM_CHANGE: SET_FLAG(flags, PF_CHANGE); continue; case SYM_RETURN: if (IS_PAREN(rules)) { item = Do_Block_Value_Throw(rules); // might GC Throw_Return_Value(item); } SET_FLAG(flags, PF_RETURN); continue; case SYM_ACCEPT: case SYM_BREAK: parse->result = 1; return index; case SYM_REJECT: parse->result = -1; return index; case SYM_FAIL: index = NOT_FOUND; goto post; case SYM_IF: item = rules++; if (IS_END(item)) goto bad_end; if (!IS_PAREN(item)) Trap1(RE_PARSE_RULE, item); item = Do_Block_Value_Throw(item); // might GC if (IS_TRUE(item)) continue; else { index = NOT_FOUND; goto post; } case SYM_LIMIT: Trap0(RE_NOT_DONE); //val = Get_Parse_Value(rules++); // if (IS_INTEGER(val)) limit = index + Int32(val); // else if (ANY_SERIES(val)) limit = VAL_INDEX(val); // else goto //goto bad_rule; // goto post; case SYM_QQ: Print_Parse_Index(parse->type, rules, series, index); continue; } } // Any other cmd must be a match command, so proceed... } else { // It's not a PARSE command, get or set it: // word: - set a variable to the series at current index if (IS_SET_WORD(item)) { Set_Var_Series(item, parse->type, series, index); continue; } // :word - change the index for the series to a new position if (IS_GET_WORD(item)) { item = Get_Var(item); // CureCode #1263 change //if (parse->type != VAL_TYPE(item) || VAL_SERIES(item) != series) // Trap1(RE_PARSE_SERIES, rules-1); if (!ANY_SERIES(item)) Trap1(RE_PARSE_SERIES, rules-1); index = Set_Parse_Series(parse, item); series = parse->series; continue; } // word - some other variable if (IS_WORD(item)) { item = Get_Var(item); } // item can still be 'word or /word } } else if (ANY_PATH(item)) { item = Do_Parse_Path(item, parse, &index); // index can be modified if (index > series->tail) index = series->tail; if (item == 0) continue; // for SET and GET cases } if (IS_PAREN(item)) { Do_Block_Value_Throw(item); // might GC if (index > series->tail) index = series->tail; continue; } // Counter? 123 if (IS_INTEGER(item)) { // Specify count or range count SET_FLAG(flags, PF_WHILE); mincount = maxcount = Int32s(item, 0); item = Get_Parse_Value(rules++); if (IS_END(item)) Trap1(RE_PARSE_END, rules-2); if (IS_INTEGER(item)) { maxcount = Int32s(item, 0); item = Get_Parse_Value(rules++); if (IS_END(item)) Trap1(RE_PARSE_END, rules-2); } } // else fall through on other values and words //-------------------------------------------------------------------- // Iterated Rule Matching Section: // // Repeats the same rule N times or until the rule fails. // The index is advanced and stored in a temp variable i until // the entire rule has been satisfied. //-------------------------------------------------------------------- item_hold = item; // a command or literal match value if (VAL_TYPE(item) <= REB_UNSET || VAL_TYPE(item) >= REB_NATIVE) goto bad_rule; begin = index; // input at beginning of match section rulen = 0; // rules consumed (do not use rule++ below) i = index; //note: rules var already advanced for (count = 0; count < maxcount;) { item = item_hold; if (IS_WORD(item)) { switch (cmd = VAL_WORD_CANON(item)) { case SYM_SKIP: i = (index < series->tail) ? index+1 : NOT_FOUND; break; case SYM_END: i = (index < series->tail) ? NOT_FOUND : series->tail; break; case SYM_TO: case SYM_THRU: if (IS_END(rules)) goto bad_end; item = Get_Parse_Value(rules); rulen = 1; i = Parse_To(parse, index, item, cmd == SYM_THRU); break; case SYM_QUOTE: if (IS_END(rules)) goto bad_end; rulen = 1; if (IS_PAREN(rules)) { item = Do_Block_Value_Throw(rules); // might GC } else item = rules; i = (0 == Cmp_Value(BLK_SKIP(series, index), item, parse->flags & AM_FIND_CASE)) ? index+1 : NOT_FOUND; break; case SYM_INTO: if (IS_END(rules)) goto bad_end; rulen = 1; item = Get_Parse_Value(rules); // sub-rules if (!IS_BLOCK(item)) goto bad_rule; val = BLK_SKIP(series, index); i = ( (ANY_BINSTR(val) || ANY_BLOCK(val)) && (Parse_Series(val, VAL_BLK_DATA(item), parse->flags, depth+1) == VAL_TAIL(val)) ) ? index+1 : NOT_FOUND; break; case SYM_DO: if (!IS_BLOCK_INPUT(parse)) goto bad_rule; i = Do_Eval_Rule(parse, index, &rules); rulen = 1; break; default: goto bad_rule; } } else if (IS_BLOCK(item)) { item = VAL_BLK_DATA(item); //if (IS_END(rules) && item == rule_head) { // rules = item; // goto top; //} i = Parse_Rules_Loop(parse, index, item, depth+1); if (parse->result) { index = (parse->result > 0) ? i : NOT_FOUND; parse->result = 0; break; } } // Parse according to datatype: else { if (IS_BLOCK_INPUT(parse)) i = Parse_Next_Block(parse, index, item, depth+1); else i = Parse_Next_String(parse, index, item, depth+1); } // Necessary for special cases like: some [to end] // i: indicates new index or failure of the match, but // that does not mean failure of the rule, because optional // matches can still succeed, if if the last match failed. if (i != NOT_FOUND) { count++; // may overflow to negative if (count < 0) count = MAX_I32; // the forever case // If input did not advance: if (i == index && !GET_FLAG(flags, PF_WHILE)) { if (count < mincount) index = NOT_FOUND; // was not enough break; } } //if (i >= series->tail) { // OLD check: no more input else { if (count < mincount) index = NOT_FOUND; // was not enough else if (i != NOT_FOUND) index = i; // else keep index as is. break; } index = i; // A BREAK word stopped us: //if (parse->result) {parse->result = 0; break;} } rules += rulen; //if (index > series->tail && index != NOT_FOUND) index = series->tail; if (index > series->tail) index = NOT_FOUND; //-------------------------------------------------------------------- // Post Match Processing: //-------------------------------------------------------------------- post: // Process special flags: if (flags) { // NOT before all others: if (GET_FLAG(flags, PF_NOT)) { if (GET_FLAG(flags, PF_NOT2) && index != NOT_FOUND) index = NOT_FOUND; else index = begin; } if (index == NOT_FOUND) { // Failure actions: // not decided: if (word) Set_Var_Basic(word, REB_NONE); if (GET_FLAG(flags, PF_THEN)) { SKIP_TO_BAR(rules); if (!IS_END(rules)) rules++; } } else { // Success actions: count = (begin > index) ? 0 : index - begin; // how much we advanced the input if (GET_FLAG(flags, PF_COPY)) { ser = (IS_BLOCK_INPUT(parse)) ? Copy_Block_Len(series, begin, count) : Copy_String(series, begin, count); // condenses Set_Var_Series(word, parse->type, ser, 0); } else if (GET_FLAG(flags, PF_SET)) { if (IS_BLOCK_INPUT(parse)) { item = Get_Var_Safe(word); if (count == 0) SET_NONE(item); else *item = *BLK_SKIP(series, begin); } else { item = Get_Var_Safe(word); if (count == 0) SET_NONE(item); else { i = GET_ANY_CHAR(series, begin); if (parse->type == REB_BINARY) { SET_INTEGER(item, i); } else { SET_CHAR(item, i); } } } } if (GET_FLAG(flags, PF_RETURN)) { ser = (IS_BLOCK_INPUT(parse)) ? Copy_Block_Len(series, begin, count) : Copy_String(series, begin, count); // condenses Throw_Return_Series(parse->type, ser); } if (GET_FLAG(flags, PF_REMOVE)) { if (count) Remove_Series(series, begin, count); index = begin; } if (flags & (1<<PF_INSERT | 1<<PF_CHANGE)) { count = GET_FLAG(flags, PF_INSERT) ? 0 : count; cmd = GET_FLAG(flags, PF_INSERT) ? 0 : (1<<AN_PART); item = rules++; if (IS_END(item)) goto bad_end; // Check for ONLY flag: if (IS_WORD(item) && NZ(cmd = VAL_CMD(item))) { if (cmd != SYM_ONLY) goto bad_rule; cmd |= (1<<AN_ONLY); item = rules++; } // CHECK FOR QUOTE!! item = Get_Parse_Value(item); // new value if (IS_UNSET(item)) Trap1(RE_NO_VALUE, rules-1); if (IS_END(item)) goto bad_end; if (IS_BLOCK_INPUT(parse)) { index = Modify_Block(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT, series, begin, item, cmd, count, 1); if (IS_LIT_WORD(item)) SET_TYPE(BLK_SKIP(series, index-1), REB_WORD); } else { if (parse->type == REB_BINARY) cmd |= (1<<AN_SERIES); // special flag index = Modify_String(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT, series, begin, item, cmd, count, 1); } } if (GET_FLAG(flags, PF_AND)) index = begin; } flags = 0; word = 0; } // Goto alternate rule and reset input: if (index == NOT_FOUND) { SKIP_TO_BAR(rules); if (IS_END(rules)) break; rules++; index = begin = start; } begin = index; mincount = maxcount = 1; } return index; bad_rule: Trap1(RE_PARSE_RULE, rules-1); bad_end: Trap1(RE_PARSE_END, rules-1); return 0; }