Example #1
0
//
//  Do_Path_Throws: C
//
// Evaluate an ANY_PATH! REBVAL, starting from the index position of that
// path value and continuing to the end.
//
// The evaluator may throw because GROUP! is evaluated, e.g. `foo/(throw 1020)`
//
// If label_sym is passed in as being non-null, then the caller is implying
// readiness to process a path which may be a function with refinements.
// These refinements will be left in order on the data stack in the case
// that `out` comes back as IS_FUNCTION().
//
// If `opt_setval` is given, the path operation will be done as a "SET-PATH!"
// if the path evaluation did not throw or error.  HOWEVER the set value
// is NOT put into `out`.  This provides more flexibility on performance in
// the evaluator, which may already have the `val` where it wants it, and
// so the extra assignment would just be overhead.
//
// !!! Path evaluation is one of the parts of R3-Alpha that has not been
// vetted very heavily by Ren-C, and needs a review and overhaul.
//
REBOOL Do_Path_Throws(
    REBVAL *out,
    REBSYM *label_sym,
    const REBVAL *path,
    REBVAL *opt_setval
) {
    REBPVS pvs;
    REBDSP dsp_orig = DSP;

    assert(ANY_PATH(path));

    // !!! There is a bug in the dispatch such that if you are running a
    // set path, it does not always assign the output, because it "thinks you
    // aren't going to look at it".  This presumably originated from before
    // parens were allowed in paths, and neglects cases like:
    //
    //     foo/(throw 1020): value
    //
    // We always have to check to see if a throw occurred.  Until this is
    // streamlined, we have to at minimum set it to something that is *not*
    // thrown so that we aren't testing uninitialized memory.  A safe trash
    // will do, which is unset in release builds.
    //
    if (opt_setval)
        SET_TRASH_SAFE(out);

    // None of the values passed in can live on the data stack, because
    // they might be relocated during the path evaluation process.
    //
    assert(!IN_DATA_STACK(out));
    assert(!IN_DATA_STACK(path));
    assert(!opt_setval || !IN_DATA_STACK(opt_setval));

    // Not currently robust for reusing passed in path or value as the output
    assert(out != path && out != opt_setval);

    assert(!opt_setval || !THROWN(opt_setval));

    // Initialize REBPVS -- see notes in %sys-do.h
    //
    pvs.opt_setval = opt_setval;
    pvs.store = out;
    pvs.orig = path;
    pvs.item = VAL_ARRAY_AT(pvs.orig); // may not be starting at head of PATH!

    // Seed the path evaluation process by looking up the first item (to
    // get a datatype to dispatch on for the later path items)
    //
    if (IS_WORD(pvs.item)) {
        pvs.value = GET_MUTABLE_VAR_MAY_FAIL(pvs.item);
        if (IS_UNSET(pvs.value))
            fail (Error(RE_NO_VALUE, pvs.item));
    }
    else {
        // !!! Ideally there would be some way to protect pvs.value during
        // successive path dispatches to make sure it does not get written.
        // This is semi-dangerously giving pvs.value a reference into the
        // input path, which should not be modified!

        pvs.value = VAL_ARRAY_AT(pvs.orig);
    }

    // Start evaluation of path:
    if (IS_END(pvs.item + 1)) {
        // If it was a single element path, return the value rather than
        // try to dispatch it (would cause a crash at time of writing)
        //
        // !!! Is this the desired behavior, or should it be an error?
    }
    else if (Path_Dispatch[VAL_TYPE_0(pvs.value)]) {
        REBOOL threw = Next_Path_Throws(&pvs);

        // !!! See comments about why the initialization of out is necessary.
        // Without it this assertion can change on some things:
        //
        //     t: now
        //     t/time: 10:20:03
        //
        // (It thinks pvs.value has its THROWN bit set when it completed
        // successfully.  It was a PE_USE_STORE case where pvs.value was reset to
        // pvs.store, and pvs.store has its thrown bit set.  Valgrind does not
        // catch any uninitialized variables.)
        //
        // There are other cases that do trip valgrind when omitting the
        // initialization, though not as clearly reproducible.
        //
        assert(threw == THROWN(pvs.value));

        if (threw) return TRUE;

        // Check for errors:
        if (NOT_END(pvs.item + 1) && !IS_FUNCTION(pvs.value)) {
            // Only function refinements should get by this line:
            fail (Error(RE_INVALID_PATH, pvs.orig, pvs.item));
        }
    }
    else if (!IS_FUNCTION(pvs.value))
        fail (Error(RE_BAD_PATH_TYPE, pvs.orig, Type_Of(pvs.value)));

    if (opt_setval) {
        // If SET then we don't return anything
        assert(IS_END(pvs.item) + 1);
        return FALSE;
    }

    // If storage was not used, then copy final value back to it:
    if (pvs.value != pvs.store) *pvs.store = *pvs.value;

    assert(!THROWN(out));

    // Return 0 if not function or is :path/word...
    if (!IS_FUNCTION(pvs.value)) {
        assert(IS_END(pvs.item) + 1);
        return FALSE;
    }

    if (label_sym) {
        REBVAL refinement;
        VAL_INIT_WRITABLE_DEBUG(&refinement);

        // When a function is hit, path processing stops as soon as the
        // processed sub-path resolves to a function. The path is still sitting
        // on the position of the last component of that sub-path. Usually,
        // this last component in the sub-path is a word naming the function.
        //
        if (IS_WORD(pvs.item)) {
            *label_sym = VAL_WORD_SYM(pvs.item);
        }
        else {
            // In rarer cases, the final component (completing the sub-path to
            // the function to call) is not a word. Such as when you use a path
            // to pick by index out of a block of functions:
            //
            //      functions: reduce [:add :subtract]
            //      functions/1 10 20
            //
            // Or when you have an immediate function value in a path with a
            // refinement. Tricky to make, but possible:
            //
            //      do reduce [
            //          to-path reduce [:append 'only] [a] [b]
            //      ]
            //

            // !!! When a function was not invoked through looking up a word
            // (or a word in a path) to use as a label, there were once three
            // different alternate labels used.  One was SYM__APPLY_, another
            // was ROOT_NONAME, and another was to be the type of the function
            // being executed.  None are fantastic, we do the type for now.

            *label_sym = SYM_FROM_KIND(VAL_TYPE(pvs.value));
        }

        // Move on to the refinements (if any)
        ++pvs.item;

        // !!! Currently, the mainline path evaluation "punts" on refinements.
        // When it finds a function, it stops the path evaluation and leaves
        // the position pvs.path before the list of refinements.
        //
        // A more elegant solution would be able to process and notice (for
        // instance) that `:APPEND/ONLY` should yield a function value that
        // has been specialized with a refinement.  Path chaining should thus
        // be able to effectively do this and give the refined function object
        // back to the evaluator or other client.
        //
        // If a label_sym is passed in, we recognize that a function dispatch
        // is going to be happening.  We do not want to pay to generate the
        // new series that would be needed to make a temporary function that
        // will be invoked and immediately GC'd  So we gather the refinements
        // on the data stack.
        //
        // This code simulates that path-processing-to-data-stack, but it
        // should really be something in dispatch iself.  In any case, we put
        // refinements on the data stack...and caller knows refinements are
        // from dsp_orig to DSP (thanks to accounting, all other operations
        // should balance!)

        for (; NOT_END(pvs.item); ++pvs.item) { // "the refinements"
            if (IS_NONE(pvs.item)) continue;

            if (IS_GROUP(pvs.item)) {
                // Note it is not legal to use the data stack directly as the
                // output location for a DO (might be resized)

                if (DO_VAL_ARRAY_AT_THROWS(&refinement, pvs.item)) {
                    *out = refinement;
                    DS_DROP_TO(dsp_orig);
                    return TRUE;
                }
                if (IS_NONE(&refinement)) continue;
                DS_PUSH(&refinement);
            }
            else if (IS_GET_WORD(pvs.item)) {
                DS_PUSH_TRASH;
                *DS_TOP = *GET_OPT_VAR_MAY_FAIL(pvs.item);
                if (IS_NONE(DS_TOP)) {
                    DS_DROP;
                    continue;
                }
            }
            else DS_PUSH(pvs.item);

            // Whatever we were trying to use as a refinement should now be
            // on the top of the data stack, and only words are legal ATM
            //
            if (!IS_WORD(DS_TOP))
                fail (Error(RE_BAD_REFINE, DS_TOP));

            // Go ahead and canonize the word symbol so we don't have to
            // do it each time in order to get a case-insenstive compare
            //
            INIT_WORD_SYM(DS_TOP, SYMBOL_TO_CANON(VAL_WORD_SYM(DS_TOP)));
        }

        // To make things easier for processing, reverse the refinements on
        // the data stack (we needed to evaluate them in forward order).
        // This way we can just pop them as we go, and know if they weren't
        // all consumed if it doesn't get back to `dsp_orig` by the end.

        if (dsp_orig != DSP) {
            REBVAL *bottom = DS_AT(dsp_orig + 1);
            REBVAL *top = DS_TOP;
            while (top > bottom) {
                refinement = *bottom;
                *bottom = *top;
                *top = refinement;

                top--;
                bottom++;
            }
        }
    }
    else {
        // !!! Historically this just ignores a result indicating this is a
        // function with refinements, e.g. ':append/only'.  However that
        // ignoring seems unwise.  It should presumably create a modified
        // function in that case which acts as if it has the refinement.
        //
        // If the caller did not pass in a label pointer we assume they are
        // likely not ready to process any refinements.
        //
        if (NOT_END(pvs.item + 1))
            fail (Error(RE_TOO_LONG)); // !!! Better error or add feature
    }

    return FALSE;
}
Example #2
0
*/	static REBCNT Parse_Rules_Loop(REBPARSE *parse, REBCNT index, REBVAL *rules, REBCNT depth)
/*
***********************************************************************/
{
	REBSER *series = parse->series;
	REBVAL *item;		// current rule item
	REBVAL *word;		// active word to be set
	REBCNT start;		// recovery restart point
	REBCNT i;			// temp index point
	REBCNT begin;		// point at beginning of match
	REBINT count;		// iterated pattern counter
	REBINT mincount;	// min pattern count
	REBINT maxcount;	// max pattern count
	REBVAL *item_hold;
	REBVAL *val;		// spare
	REBCNT rulen;
	REBSER *ser;
	REBFLG flags;
	REBCNT cmd;
	REBVAL *rule_head = rules;

	CHECK_STACK(&flags);
	//if (depth > MAX_PARSE_DEPTH) Trap_Word(RE_LIMIT_HIT, SYM_PARSE, 0);
	flags = 0;
	word = 0;
	mincount = maxcount = 1;
	start = begin = index;

	// For each rule in the rule block:
	while (NOT_END(rules)) {

		//Print_Parse_Index(parse->type, rules, series, index);

		if (--Eval_Count <= 0 || Eval_Signals) Do_Signals();

		//--------------------------------------------------------------------
		// Pre-Rule Processing Section
		//
		// For non-iterated rules, including setup for iterated rules.
		// The input index is not advanced here, but may be changed by
		// a GET-WORD variable.
		//--------------------------------------------------------------------

		item = rules++;

		// If word, set-word, or get-word, process it:
		if (VAL_TYPE(item) >= REB_WORD && VAL_TYPE(item) <= REB_GET_WORD) {

			// Is it a command word?
			if (cmd = VAL_CMD(item)) {

				if (!IS_WORD(item)) Trap1(RE_PARSE_COMMAND, item); // SET or GET not allowed

				if (cmd <= SYM_BREAK) { // optimization

					switch (cmd) {

					case SYM_OR_BAR:
						return index;	// reached it successfully

					// Note: mincount = maxcount = 1 on entry
					case SYM_WHILE:
						SET_FLAG(flags, PF_WHILE);
					case SYM_ANY:
						mincount = 0;
					case SYM_SOME:
						maxcount = MAX_I32;
						continue;

					case SYM_OPT:
						mincount = 0;
						continue;

					case SYM_COPY:
						SET_FLAG(flags, PF_COPY);
					case SYM_SET:
						SET_FLAG(flags, PF_SET);
						item = rules++;
						if (!IS_WORD(item)) Trap1(RE_PARSE_VARIABLE, item);
						if (VAL_CMD(item)) Trap1(RE_PARSE_COMMAND, item);
						word = item;
						continue;

					case SYM_NOT:
						SET_FLAG(flags, PF_NOT);
						flags ^= (1<<PF_NOT2);
						continue;
	
					case SYM_AND:
						SET_FLAG(flags, PF_AND);
						continue;

					case SYM_THEN:
						SET_FLAG(flags, PF_THEN);
						continue;

					case SYM_REMOVE:
						SET_FLAG(flags, PF_REMOVE);
						continue;
					
					case SYM_INSERT:
						SET_FLAG(flags, PF_INSERT);
						goto post;
					
					case SYM_CHANGE:
						SET_FLAG(flags, PF_CHANGE);
						continue;

					case SYM_RETURN:
						if (IS_PAREN(rules)) {
							item = Do_Block_Value_Throw(rules); // might GC
							Throw_Return_Value(item);
						}
						SET_FLAG(flags, PF_RETURN);
						continue;

					case SYM_ACCEPT:
					case SYM_BREAK:
						parse->result = 1;
						return index;

					case SYM_REJECT:
						parse->result = -1;
						return index;

					case SYM_FAIL:
						index = NOT_FOUND;
						goto post;

					case SYM_IF:
						item = rules++;
						if (IS_END(item)) goto bad_end;
						if (!IS_PAREN(item)) Trap1(RE_PARSE_RULE, item);
						item = Do_Block_Value_Throw(item); // might GC
						if (IS_TRUE(item)) continue;
						else {
							index = NOT_FOUND;
							goto post;
						}

					case SYM_LIMIT:
						Trap0(RE_NOT_DONE);
						//val = Get_Parse_Value(rules++);
					//	if (IS_INTEGER(val)) limit = index + Int32(val);
					//	else if (ANY_SERIES(val)) limit = VAL_INDEX(val);
					//	else goto
						//goto bad_rule;
					//	goto post;

					case SYM_QQ:
						Print_Parse_Index(parse->type, rules, series, index);
						continue;
					}
				}
				// Any other cmd must be a match command, so proceed...

			} else { // It's not a PARSE command, get or set it:

				// word: - set a variable to the series at current index
				if (IS_SET_WORD(item)) {
					Set_Var_Series(item, parse->type, series, index);
					continue;
				}

				// :word - change the index for the series to a new position
				if (IS_GET_WORD(item)) {
					item = Get_Var(item);
					// CureCode #1263 change
					//if (parse->type != VAL_TYPE(item) || VAL_SERIES(item) != series)
					//	Trap1(RE_PARSE_SERIES, rules-1);
					if (!ANY_SERIES(item)) Trap1(RE_PARSE_SERIES, rules-1);
					index = Set_Parse_Series(parse, item);
					series = parse->series;
					continue;
				}

				// word - some other variable
				if (IS_WORD(item)) {
					item = Get_Var(item);
				}

				// item can still be 'word or /word
			}
		}
		else if (ANY_PATH(item)) {
			item = Do_Parse_Path(item, parse, &index); // index can be modified
			if (index > series->tail) index = series->tail;
			if (item == 0) continue; // for SET and GET cases
		}

		if (IS_PAREN(item)) {
			Do_Block_Value_Throw(item); // might GC
			if (index > series->tail) index = series->tail;
			continue;
		}

		// Counter? 123
		if (IS_INTEGER(item)) {	// Specify count or range count
			SET_FLAG(flags, PF_WHILE);
			mincount = maxcount = Int32s(item, 0);
			item = Get_Parse_Value(rules++);
			if (IS_END(item)) Trap1(RE_PARSE_END, rules-2);
			if (IS_INTEGER(item)) {
				maxcount = Int32s(item, 0);
				item = Get_Parse_Value(rules++);
				if (IS_END(item)) Trap1(RE_PARSE_END, rules-2);
			}
		}
		// else fall through on other values and words

		//--------------------------------------------------------------------
		// Iterated Rule Matching Section:
		//
		// Repeats the same rule N times or until the rule fails.
		// The index is advanced and stored in a temp variable i until
		// the entire rule has been satisfied.
		//--------------------------------------------------------------------

		item_hold = item;	// a command or literal match value
		if (VAL_TYPE(item) <= REB_UNSET || VAL_TYPE(item) >= REB_NATIVE) goto bad_rule;
		begin = index;		// input at beginning of match section
		rulen = 0;			// rules consumed (do not use rule++ below)
		i = index;

		//note: rules var already advanced

		for (count = 0; count < maxcount;) {

			item = item_hold;

			if (IS_WORD(item)) {

				switch (cmd = VAL_WORD_CANON(item)) {

				case SYM_SKIP:
					i = (index < series->tail) ? index+1 : NOT_FOUND;
					break;

				case SYM_END:
					i = (index < series->tail) ? NOT_FOUND : series->tail;
					break;

				case SYM_TO:
				case SYM_THRU:
					if (IS_END(rules)) goto bad_end;
					item = Get_Parse_Value(rules);
					rulen = 1;
					i = Parse_To(parse, index, item, cmd == SYM_THRU);
					break;
					
				case SYM_QUOTE:
					if (IS_END(rules)) goto bad_end;
					rulen = 1;
					if (IS_PAREN(rules)) {
						item = Do_Block_Value_Throw(rules); // might GC
					}
					else item = rules;
					i = (0 == Cmp_Value(BLK_SKIP(series, index), item, parse->flags & AM_FIND_CASE)) ? index+1 : NOT_FOUND;
					break;

				case SYM_INTO:
					if (IS_END(rules)) goto bad_end;
					rulen = 1;
					item = Get_Parse_Value(rules); // sub-rules
					if (!IS_BLOCK(item)) goto bad_rule;
					val = BLK_SKIP(series, index);
					i = (
						(ANY_BINSTR(val) || ANY_BLOCK(val))
						&& (Parse_Series(val, VAL_BLK_DATA(item), parse->flags, depth+1) == VAL_TAIL(val))
					) ? index+1 : NOT_FOUND;
					break;

				case SYM_DO:
					if (!IS_BLOCK_INPUT(parse)) goto bad_rule;
					i = Do_Eval_Rule(parse, index, &rules);
					rulen = 1;
					break;

				default:
					goto bad_rule;
				}
			}
			else if (IS_BLOCK(item)) {
				item = VAL_BLK_DATA(item);
				//if (IS_END(rules) && item == rule_head) {
				//	rules = item;
				//	goto top;
				//}
				i = Parse_Rules_Loop(parse, index, item, depth+1);
				if (parse->result) {
					index = (parse->result > 0) ? i : NOT_FOUND;
					parse->result = 0;
					break;
				}
			}
			// Parse according to datatype:
			else {
				if (IS_BLOCK_INPUT(parse))
					i = Parse_Next_Block(parse, index, item, depth+1);
				else
					i = Parse_Next_String(parse, index, item, depth+1);
			}

			// Necessary for special cases like: some [to end]
			// i: indicates new index or failure of the match, but
			// that does not mean failure of the rule, because optional
			// matches can still succeed, if if the last match failed.
			if (i != NOT_FOUND) {
				count++; // may overflow to negative
				if (count < 0) count = MAX_I32; // the forever case
				// If input did not advance:
				if (i == index && !GET_FLAG(flags, PF_WHILE)) {
					if (count < mincount) index = NOT_FOUND; // was not enough
					break;
				}
			}
			//if (i >= series->tail) {     // OLD check: no more input
			else {
				if (count < mincount) index = NOT_FOUND; // was not enough
				else if (i != NOT_FOUND) index = i;
				// else keep index as is.
				break;
			}
			index = i;

			// A BREAK word stopped us:
			//if (parse->result) {parse->result = 0; break;}
		}

		rules += rulen;

		//if (index > series->tail && index != NOT_FOUND) index = series->tail;
		if (index > series->tail) index = NOT_FOUND;

		//--------------------------------------------------------------------
		// Post Match Processing:
		//--------------------------------------------------------------------
post:
		// Process special flags:
		if (flags) {
			// NOT before all others:
			if (GET_FLAG(flags, PF_NOT)) {
				if (GET_FLAG(flags, PF_NOT2) && index != NOT_FOUND) index = NOT_FOUND;
				else index = begin;
			}
			if (index == NOT_FOUND) { // Failure actions:
				// not decided: if (word) Set_Var_Basic(word, REB_NONE);
				if (GET_FLAG(flags, PF_THEN)) {
					SKIP_TO_BAR(rules);
					if (!IS_END(rules)) rules++;
				}
			}
			else {  // Success actions:
				count = (begin > index) ? 0 : index - begin; // how much we advanced the input
				if (GET_FLAG(flags, PF_COPY)) {
					ser = (IS_BLOCK_INPUT(parse))
						? Copy_Block_Len(series, begin, count)
						: Copy_String(series, begin, count); // condenses
					Set_Var_Series(word, parse->type, ser, 0);
				}
				else if (GET_FLAG(flags, PF_SET)) {
					if (IS_BLOCK_INPUT(parse)) {
						item = Get_Var_Safe(word);
						if (count == 0) SET_NONE(item);
						else *item = *BLK_SKIP(series, begin);
					}
					else {
						item = Get_Var_Safe(word);
						if (count == 0) SET_NONE(item);
						else {
							i = GET_ANY_CHAR(series, begin);
							if (parse->type == REB_BINARY) {
								SET_INTEGER(item, i);
							} else {
								SET_CHAR(item, i);
							}
						}
					}
				}
				if (GET_FLAG(flags, PF_RETURN)) {
					ser = (IS_BLOCK_INPUT(parse))
						? Copy_Block_Len(series, begin, count)
						: Copy_String(series, begin, count); // condenses
					Throw_Return_Series(parse->type, ser);
				}
				if (GET_FLAG(flags, PF_REMOVE)) {
					if (count) Remove_Series(series, begin, count);
					index = begin;
				}
				if (flags & (1<<PF_INSERT | 1<<PF_CHANGE)) {
					count = GET_FLAG(flags, PF_INSERT) ? 0 : count;
					cmd = GET_FLAG(flags, PF_INSERT) ? 0 : (1<<AN_PART);
					item = rules++;
					if (IS_END(item)) goto bad_end;
					// Check for ONLY flag:
					if (IS_WORD(item) && NZ(cmd = VAL_CMD(item))) {
						if (cmd != SYM_ONLY) goto bad_rule;
						cmd |= (1<<AN_ONLY);
						item = rules++;
					}
					// CHECK FOR QUOTE!!
					item = Get_Parse_Value(item); // new value
					if (IS_UNSET(item)) Trap1(RE_NO_VALUE, rules-1);
					if (IS_END(item)) goto bad_end;
					if (IS_BLOCK_INPUT(parse)) {
						index = Modify_Block(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT,
								series, begin, item, cmd, count, 1);
						if (IS_LIT_WORD(item)) SET_TYPE(BLK_SKIP(series, index-1), REB_WORD);
					}
					else {
						if (parse->type == REB_BINARY) cmd |= (1<<AN_SERIES); // special flag
						index = Modify_String(GET_FLAG(flags, PF_CHANGE) ? A_CHANGE : A_INSERT,
								series, begin, item, cmd, count, 1);
					}
				}
				if (GET_FLAG(flags, PF_AND)) index = begin;
			}

			flags = 0;
			word = 0;
		}

		// Goto alternate rule and reset input:
		if (index == NOT_FOUND) {
			SKIP_TO_BAR(rules);
			if (IS_END(rules)) break;
			rules++;
			index = begin = start;
		}

		begin = index;
		mincount = maxcount = 1;

	}
	return index;

bad_rule:
	Trap1(RE_PARSE_RULE, rules-1);
bad_end:
	Trap1(RE_PARSE_END, rules-1);
	return 0;
}