Beispiel #1
0
/*
 - caltdissect - dissect match for alternation node
 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
caltdissect(
    struct vars *v,
    struct subre *t,
    chr *begin,		/* beginning of relevant substring */
    chr *end)		/* end of same */
{
    struct dfa *d;
    int er;

    /* We loop, rather than tail-recurse, to handle a chain of alternatives */
    while (t != NULL) {
	assert(t->op == '|');
	assert(t->left != NULL && t->left->cnfa.nstates > 0);

	MDEBUG(("calt n%d\n", t->id));

	d = getsubdfa(v, t->left);
	NOERR();
	if (longest(v, d, begin, end, (int *) NULL) == end) {
	    MDEBUG(("calt matched\n"));
	    er = cdissect(v, t->left, begin, end);
	    if (er != REG_NOMATCH) {
		return er;
	    }
	}

	t = t->right;
    }

    return REG_NOMATCH;
}
Beispiel #2
0
/*
 - citerdissect - dissect match for iteration node
 ^ static int citerdissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
citerdissect(struct vars * v,
	     struct subre * t,
	     chr *begin,	/* beginning of relevant substring */
	     chr *end)		/* end of same */
{
    struct dfa *d;
    chr	  **endpts;
    chr	   *limit;
    int		min_matches;
    size_t	max_matches;
    int		nverified;
    int		k;
    int		i;
    int		er;

    assert(t->op == '*');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(!(t->left->flags & SHORTER));
    assert(begin <= end);

    /*
     * If zero matches are allowed, and target string is empty, just declare
     * victory.  OTOH, if target string isn't empty, zero matches can't work
     * so we pretend the min is 1.
     */
    min_matches = t->min;
    if (min_matches <= 0) {
	if (begin == end)
	    return REG_OKAY;
	min_matches = 1;
    }

    /*
     * We need workspace to track the endpoints of each sub-match.  Normally
     * we consider only nonzero-length sub-matches, so there can be at most
     * end-begin of them.  However, if min is larger than that, we will also
     * consider zero-length sub-matches in order to find enough matches.
     *
     * For convenience, endpts[0] contains the "begin" pointer and we store
     * sub-match endpoints in endpts[1..max_matches].
     */
    max_matches = end - begin;
    if (max_matches > (size_t)t->max && t->max != DUPINF)
	max_matches = t->max;
    if (max_matches < (size_t)min_matches)
	max_matches = min_matches;
    endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    if (endpts == NULL)
	return REG_ESPACE;
    endpts[0] = begin;

    d = getsubdfa(v, t->left);
    if (ISERR()) {
	FREE(endpts);
	return v->err;
    }
    MDEBUG(("citer %d\n", t->id));

    /*
     * Our strategy is to first find a set of sub-match endpoints that are
     * valid according to the child node's DFA, and then recursively dissect
     * each sub-match to confirm validity.  If any validity check fails,
     * backtrack the last sub-match and try again.  And, when we next try for
     * a validity check, we need not recheck any successfully verified
     * sub-matches that we didn't move the endpoints of.  nverified remembers
     * how many sub-matches are currently known okay.
     */

    /* initialize to consider first sub-match */
    nverified = 0;
    k = 1;
    limit = end;

    /* iterate until satisfaction or failure */
    while (k > 0) {
	/* try to find an endpoint for the k'th sub-match */
	endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
	if (endpts[k] == NULL) {
	    /* no match possible, so see if we can shorten previous one */
	    k--;
	    goto backtrack;
	}
	MDEBUG(("%d: working endpoint %d: %ld\n",
		t->id, k, LOFF(endpts[k])));

	/* k'th sub-match can no longer be considered verified */
	if (nverified >= k)
	    nverified = k - 1;

	if (endpts[k] != end) {
	    /* haven't reached end yet, try another iteration if allowed */
	    if ((size_t)k >= max_matches) {
		/* must try to shorten some previous match */
		k--;
		goto backtrack;
	    }

	    /* reject zero-length match unless necessary to achieve min */
	    if (endpts[k] == endpts[k - 1] &&
		(k >= min_matches || min_matches - k < end - endpts[k]))
		goto backtrack;

	    k++;
	    limit = end;
	    continue;
	}

	/*
	 * We've identified a way to divide the string into k sub-matches
	 * that works so far as the child DFA can tell.  If k is an allowed
	 * number of matches, start the slow part: recurse to verify each
	 * sub-match.  We always have k <= max_matches, needn't check that.
	 */
	if (k < min_matches)
	    goto backtrack;

	MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));

	for (i = nverified + 1; i <= k; i++) {
	    zaptreesubs(v, t->left);
	    er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
	    if (er == REG_OKAY) {
		nverified = i;
		continue;
	    }
	    if (er == REG_NOMATCH)
		break;
	    /* oops, something failed */
	    FREE(endpts);
	    return er;
	}

	if (i > k) {
	    /* satisfaction */
	    MDEBUG(("%d successful\n", t->id));
	    FREE(endpts);
	    return REG_OKAY;
	}

	/* match failed to verify, so backtrack */

    backtrack:
	/*
	 * Must consider shorter versions of the current sub-match.  However,
	 * we'll only ask for a zero-length match if necessary.
	 */
	while (k > 0) {
	    chr	   *prev_end = endpts[k - 1];

	    if (endpts[k] > prev_end) {
		limit = endpts[k] - 1;
		if (limit > prev_end ||
		    (k < min_matches && min_matches - k >= end - prev_end)) {
		    /* break out of backtrack loop, continue the outer one */
		    break;
		}
	    }
	    /* can't shorten k'th sub-match any more, consider previous one */
	    k--;
	}
    }

    /* all possibilities exhausted */
    MDEBUG(("%d failed\n", t->id));
    FREE(endpts);
    return REG_NOMATCH;
}
Beispiel #3
0
/*
 - crevcondissect - dissect match for concatenation node, shortest-first
 ^ static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
crevcondissect(
    struct vars *v,
    struct subre *t,
    chr *begin,		/* beginning of relevant substring */
    chr *end)		/* end of same */
{
    struct dfa *d, *d2;
    chr *mid;

    assert(t->op == '.');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(t->right != NULL && t->right->cnfa.nstates > 0);
    assert(t->left->flags&SHORTER);

    d = getsubdfa(v, t->left);
    NOERR();
    d2 = getsubdfa(v, t->right);
    NOERR();

    MDEBUG(("crevcon %d\n", t->id));

    /*
     * Pick a tentative midpoint.
     */

    mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
    if (mid == NULL) {
	return REG_NOMATCH;
    }
    MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));

    /*
     * Iterate until satisfaction or failure.
     */

    for (;;) {
	/*
	 * Try this midpoint on for size.
	 */

	if (longest(v, d2, mid, end, NULL) == end) {
	    int er = cdissect(v, t->left, begin, mid);

	    if (er == REG_OKAY) {
		er = cdissect(v, t->right, mid, end);
		if (er == REG_OKAY) {
		    /*
		     * Satisfaction.
		     */

		    MDEBUG(("successful\n"));
		    return REG_OKAY;
		}
	    }
	    if (er != REG_NOMATCH) {
		return er;
	    }
	}

	/*
	 * That midpoint didn't work, find a new one.
	 */

	if (mid == end) {
	    /*
	     * All possibilities exhausted.
	     */

	    MDEBUG(("%d no midpoint\n", t->id));
	    return REG_NOMATCH;
	}
	mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
	if (mid == NULL) {
	    /*
	     * Failed to find a new one.
	     */

	    MDEBUG(("%d failed midpoint\n", t->id));
	    return REG_NOMATCH;
	}
	MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
	zaptreesubs(v, t->left);
	zaptreesubs(v, t->right);
    }
}
Beispiel #4
0
/*
 * ccondissect - dissect match for concatenation node
 */
static int						/* regexec return code */
ccondissect(struct vars * v,
			struct subre * t,
			chr *begin,			/* beginning of relevant substring */
			chr *end)			/* end of same */
{
	struct dfa *d;
	struct dfa *d2;
	chr		   *mid;
	int			er;

	assert(t->op == '.');
	assert(t->left != NULL && t->left->cnfa.nstates > 0);
	assert(t->right != NULL && t->right->cnfa.nstates > 0);
	assert(!(t->left->flags & SHORTER));

	d = getsubdfa(v, t->left);
	NOERR();
	d2 = getsubdfa(v, t->right);
	NOERR();
	MDEBUG(("cconcat %d\n", t->id));

	/* pick a tentative midpoint */
	mid = longest(v, d, begin, end, (int *) NULL);
	if (mid == NULL)
		return REG_NOMATCH;
	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));

	/* iterate until satisfaction or failure */
	for (;;)
	{
		/* try this midpoint on for size */
		if (longest(v, d2, mid, end, (int *) NULL) == end)
		{
			er = cdissect(v, t->left, begin, mid);
			if (er == REG_OKAY)
			{
				er = cdissect(v, t->right, mid, end);
				if (er == REG_OKAY)
				{
					/* satisfaction */
					MDEBUG(("successful\n"));
					return REG_OKAY;
				}
			}
			if (er != REG_NOMATCH)
				return er;
		}

		/* that midpoint didn't work, find a new one */
		if (mid == begin)
		{
			/* all possibilities exhausted */
			MDEBUG(("%d no midpoint\n", t->id));
			return REG_NOMATCH;
		}
		mid = longest(v, d, begin, mid - 1, (int *) NULL);
		if (mid == NULL)
		{
			/* failed to find a new one */
			MDEBUG(("%d failed midpoint\n", t->id));
			return REG_NOMATCH;
		}
		MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
		zaptreesubs(v, t->left);
		zaptreesubs(v, t->right);
	}

	/* can't get here */
	return REG_ASSERT;
}