/* - zaptreesubs - initialize subexpressions within subtree to "no match" ^ static void zaptreesubs(struct vars *, struct subre *); */ static void zaptreesubs( struct vars *const v, struct subre *const t) { if (t->op == '(') { int n = t->subno; assert(n > 0); if ((size_t) n < v->nmatch) { v->pmatch[n].rm_so = -1; v->pmatch[n].rm_eo = -1; } } if (t->left != NULL) { zaptreesubs(v, t->left); } if (t->right != NULL) { zaptreesubs(v, t->right); } }
/* - citerdissect - dissect match for iteration node ^ static int citerdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ citerdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; chr **endpts; chr *limit; int min_matches; size_t max_matches; int nverified; int k; int i; int er; assert(t->op == '*'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(!(t->left->flags & SHORTER)); assert(begin <= end); /* * If zero matches are allowed, and target string is empty, just declare * victory. OTOH, if target string isn't empty, zero matches can't work * so we pretend the min is 1. */ min_matches = t->min; if (min_matches <= 0) { if (begin == end) return REG_OKAY; min_matches = 1; } /* * We need workspace to track the endpoints of each sub-match. Normally * we consider only nonzero-length sub-matches, so there can be at most * end-begin of them. However, if min is larger than that, we will also * consider zero-length sub-matches in order to find enough matches. * * For convenience, endpts[0] contains the "begin" pointer and we store * sub-match endpoints in endpts[1..max_matches]. */ max_matches = end - begin; if (max_matches > (size_t)t->max && t->max != DUPINF) max_matches = t->max; if (max_matches < (size_t)min_matches) max_matches = min_matches; endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); if (endpts == NULL) return REG_ESPACE; endpts[0] = begin; d = getsubdfa(v, t->left); if (ISERR()) { FREE(endpts); return v->err; } MDEBUG(("citer %d\n", t->id)); /* * Our strategy is to first find a set of sub-match endpoints that are * valid according to the child node's DFA, and then recursively dissect * each sub-match to confirm validity. If any validity check fails, * backtrack the last sub-match and try again. And, when we next try for * a validity check, we need not recheck any successfully verified * sub-matches that we didn't move the endpoints of. nverified remembers * how many sub-matches are currently known okay. */ /* initialize to consider first sub-match */ nverified = 0; k = 1; limit = end; /* iterate until satisfaction or failure */ while (k > 0) { /* try to find an endpoint for the k'th sub-match */ endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL); if (endpts[k] == NULL) { /* no match possible, so see if we can shorten previous one */ k--; goto backtrack; } MDEBUG(("%d: working endpoint %d: %ld\n", t->id, k, LOFF(endpts[k]))); /* k'th sub-match can no longer be considered verified */ if (nverified >= k) nverified = k - 1; if (endpts[k] != end) { /* haven't reached end yet, try another iteration if allowed */ if ((size_t)k >= max_matches) { /* must try to shorten some previous match */ k--; goto backtrack; } /* reject zero-length match unless necessary to achieve min */ if (endpts[k] == endpts[k - 1] && (k >= min_matches || min_matches - k < end - endpts[k])) goto backtrack; k++; limit = end; continue; } /* * We've identified a way to divide the string into k sub-matches * that works so far as the child DFA can tell. If k is an allowed * number of matches, start the slow part: recurse to verify each * sub-match. We always have k <= max_matches, needn't check that. */ if (k < min_matches) goto backtrack; MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); for (i = nverified + 1; i <= k; i++) { zaptreesubs(v, t->left); er = cdissect(v, t->left, endpts[i - 1], endpts[i]); if (er == REG_OKAY) { nverified = i; continue; } if (er == REG_NOMATCH) break; /* oops, something failed */ FREE(endpts); return er; } if (i > k) { /* satisfaction */ MDEBUG(("%d successful\n", t->id)); FREE(endpts); return REG_OKAY; } /* match failed to verify, so backtrack */ backtrack: /* * Must consider shorter versions of the current sub-match. However, * we'll only ask for a zero-length match if necessary. */ while (k > 0) { chr *prev_end = endpts[k - 1]; if (endpts[k] > prev_end) { limit = endpts[k] - 1; if (limit > prev_end || (k < min_matches && min_matches - k >= end - prev_end)) { /* break out of backtrack loop, continue the outer one */ break; } } /* can't shorten k'th sub-match any more, consider previous one */ k--; } } /* all possibilities exhausted */ MDEBUG(("%d failed\n", t->id)); FREE(endpts); return REG_NOMATCH; }
/* - crevcondissect - dissect match for concatenation node, shortest-first ^ static int crevcondissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ crevcondissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d, *d2; chr *mid; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags&SHORTER); d = getsubdfa(v, t->left); NOERR(); d2 = getsubdfa(v, t->right); NOERR(); MDEBUG(("crevcon %d\n", t->id)); /* * Pick a tentative midpoint. */ mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. */ for (;;) { /* * Try this midpoint on for size. */ if (longest(v, d2, mid, end, NULL) == end) { int er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); return REG_OKAY; } } if (er != REG_NOMATCH) { return er; } } /* * That midpoint didn't work, find a new one. */ if (mid == end) { /* * All possibilities exhausted. */ MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = shortest(v, d, begin, mid+1, end, NULL, NULL); if (mid == NULL) { /* * Failed to find a new one. */ MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } }
/* * ccondissect - dissect match for concatenation node */ static int /* regexec return code */ ccondissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(!(t->left->flags & SHORTER)); d = getsubdfa(v, t->left); NOERR(); d2 = getsubdfa(v, t->right); NOERR(); MDEBUG(("cconcat %d\n", t->id)); /* pick a tentative midpoint */ mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) return REG_NOMATCH; MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ for (;;) { /* try this midpoint on for size */ if (longest(v, d2, mid, end, (int *) NULL) == end) { er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* satisfaction */ MDEBUG(("successful\n")); return REG_OKAY; } } if (er != REG_NOMATCH) return er; } /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } /* can't get here */ return REG_ASSERT; }