Beispiel #1
0
/*
 - caltdissect - determine alternative subexpression matches (w. complications)
 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
caltdissect(
    struct vars *v,
    struct subre *t,
    chr *begin,			/* beginning of relevant substring */
    chr *end)			/* end of same */
{
    struct dfa *d;
    int er;
#define	UNTRIED	0		/* not yet tried at all */
#define	TRYING	1		/* top matched, trying submatches */
#define	TRIED	2		/* top didn't match or submatches exhausted */

    if (t == NULL) {
	return REG_NOMATCH;
    }
    assert(t->op == '|');
    if (v->mem[t->retry] == TRIED) {
	return caltdissect(v, t->right, begin, end);
    }

    MDEBUG(("calt n%d\n", t->retry));
    assert(t->left != NULL);

    if (v->mem[t->retry] == UNTRIED) {
	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
	if (ISERR()) {
	    return v->err;
	}
	if (longest(v, d, begin, end, NULL) != end) {
	    freedfa(d);
	    v->mem[t->retry] = TRIED;
	    return caltdissect(v, t->right, begin, end);
	}
	freedfa(d);
	MDEBUG(("calt matched\n"));
	v->mem[t->retry] = TRYING;
    }

    er = cdissect(v, t->left, begin, end);
    if (er != REG_NOMATCH) {
	return er;
    }

    v->mem[t->retry] = TRIED;
    return caltdissect(v, t->right, begin, end);
}
Beispiel #2
0
/*
 - cdissect - determine subexpression matches (with complications)
 * The retry memory stores the offset of the trial midpoint from begin, plus 1
 * so that 0 uniquely means "clean slate".
 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
cdissect(
    struct vars *v,
    struct subre *t,
    chr *begin,			/* beginning of relevant substring */
    chr *end)			/* end of same */
{
    int er;

    assert(t != NULL);
    MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));

    switch (t->op) {
    case '=':			/* terminal node */
	assert(t->left == NULL && t->right == NULL);
	return REG_OKAY;	/* no action, parent did the work */
	break;
    case '|':			/* alternation */
	assert(t->left != NULL);
	return caltdissect(v, t, begin, end);
	break;
    case 'b':			/* back ref -- shouldn't be calling us! */
	assert(t->left == NULL && t->right == NULL);
	return cbrdissect(v, t, begin, end);
	break;
    case '.':			/* concatenation */
	assert(t->left != NULL && t->right != NULL);
	return ccondissect(v, t, begin, end);
	break;
    case '(':			/* capturing */
	assert(t->left != NULL && t->right == NULL);
	assert(t->subno > 0);
	er = cdissect(v, t->left, begin, end);
	if (er == REG_OKAY) {
	    subset(v, t, begin, end);
	}
	return er;
	break;
    default:
	return REG_ASSERT;
	break;
    }
}
Beispiel #3
0
/*
 - cdissect - check backrefs and determine subexpression matches
 * cdissect recursively processes a subre tree to check matching of backrefs
 * and/or identify submatch boundaries for capture nodes.  The proposed match
 * runs from "begin" to "end" (not including "end"), and we are basically
 * "dissecting" it to see where the submatches are.
 * Before calling any level of cdissect, the caller must have run the node's
 * DFA and found that the proposed substring satisfies the DFA.  (We make
 * the caller do that because in concatenation and iteration nodes, it's
 * much faster to check all the substrings against the child DFAs before we
 * recurse.)  Also, caller must have cleared subexpression match data via
 * zaptreesubs (or zapallsubs at the top level).
 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
 */
static int			/* regexec return code */
cdissect(
    struct vars *v,
    struct subre *t,
    chr *begin,		/* beginning of relevant substring */
    chr *end)		/* end of same */
{
    int er;

    assert(t != NULL);
    MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));

    switch (t->op) {
    case '=':			/* terminal node */
	assert(t->left == NULL && t->right == NULL);
	er = REG_OKAY;		/* no action, parent did the work */
	break;
    case 'b':			/* back reference */
	assert(t->left == NULL && t->right == NULL);
	er = cbrdissect(v, t, begin, end);
	break;
    case '.':			/* concatenation */
	assert(t->left != NULL && t->right != NULL);
	if (t->left->flags & SHORTER) /* reverse scan */
	    er = crevcondissect(v, t, begin, end);
	else
	    er = ccondissect(v, t, begin, end);
	break;
    case '|':			/* alternation */
	assert(t->left != NULL);
	er = caltdissect(v, t, begin, end);
	break;
    case '*':			/* iteration */
	assert(t->left != NULL);
	if (t->left->flags & SHORTER) /* reverse scan */
	    er = creviterdissect(v, t, begin, end);
	else
	    er = citerdissect(v, t, begin, end);
	break;
    case '(':			/* capturing */
	assert(t->left != NULL && t->right == NULL);
	assert(t->subno > 0);
	er = cdissect(v, t->left, begin, end);
	if (er == REG_OKAY) {
	    subset(v, t, begin, end);
	}
	break;
    default:
	er = REG_ASSERT;
	break;
    }

    /*
     * We should never have a match failure unless backrefs lurk below;
     * otherwise, either caller failed to check the DFA, or there's some
     * inconsistency between the DFA and the node's innards.
     */
    assert(er != REG_NOMATCH || (t->flags & BACKR));

    return er;
}