/* - caltdissect - determine alternative subexpression matches (w. complications) ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ caltdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; int er; #define UNTRIED 0 /* not yet tried at all */ #define TRYING 1 /* top matched, trying submatches */ #define TRIED 2 /* top didn't match or submatches exhausted */ if (t == NULL) { return REG_NOMATCH; } assert(t->op == '|'); if (v->mem[t->retry] == TRIED) { return caltdissect(v, t->right, begin, end); } MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); if (v->mem[t->retry] == UNTRIED) { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } if (longest(v, d, begin, end, NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); } freedfa(d); MDEBUG(("calt matched\n")); v->mem[t->retry] = TRYING; } er = cdissect(v, t->left, begin, end); if (er != REG_NOMATCH) { return er; } v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); }
/* - cdissect - determine subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin, plus 1 * so that 0 uniquely means "clean slate". ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ cdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); switch (t->op) { case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ break; case '|': /* alternation */ assert(t->left != NULL); return caltdissect(v, t, begin, end); break; case 'b': /* back ref -- shouldn't be calling us! */ assert(t->left == NULL && t->right == NULL); return cbrdissect(v, t, begin, end); break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return ccondissect(v, t, begin, end); break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); er = cdissect(v, t->left, begin, end); if (er == REG_OKAY) { subset(v, t, begin, end); } return er; break; default: return REG_ASSERT; break; } }
/* - cdissect - check backrefs and determine subexpression matches * cdissect recursively processes a subre tree to check matching of backrefs * and/or identify submatch boundaries for capture nodes. The proposed match * runs from "begin" to "end" (not including "end"), and we are basically * "dissecting" it to see where the submatches are. * Before calling any level of cdissect, the caller must have run the node's * DFA and found that the proposed substring satisfies the DFA. (We make * the caller do that because in concatenation and iteration nodes, it's * much faster to check all the substrings against the child DFAs before we * recurse.) Also, caller must have cleared subexpression match data via * zaptreesubs (or zapallsubs at the top level). ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ cdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); switch (t->op) { case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); er = REG_OKAY; /* no action, parent did the work */ break; case 'b': /* back reference */ assert(t->left == NULL && t->right == NULL); er = cbrdissect(v, t, begin, end); break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); if (t->left->flags & SHORTER) /* reverse scan */ er = crevcondissect(v, t, begin, end); else er = ccondissect(v, t, begin, end); break; case '|': /* alternation */ assert(t->left != NULL); er = caltdissect(v, t, begin, end); break; case '*': /* iteration */ assert(t->left != NULL); if (t->left->flags & SHORTER) /* reverse scan */ er = creviterdissect(v, t, begin, end); else er = citerdissect(v, t, begin, end); break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); er = cdissect(v, t->left, begin, end); if (er == REG_OKAY) { subset(v, t, begin, end); } break; default: er = REG_ASSERT; break; } /* * We should never have a match failure unless backrefs lurk below; * otherwise, either caller failed to check the DFA, or there's some * inconsistency between the DFA and the node's innards. */ assert(er != REG_NOMATCH || (t->flags & BACKR)); return er; }