/* - complicatedFind - find a match for the main NFA (with complications) ^ static int complicatedFind(struct vars *, struct cnfa *, struct colormap *); */ static int complicatedFind( struct vars *const v, struct cnfa *const cnfa, struct colormap *const cm) { struct dfa *s, *d; chr *cold = NULL; /* silence gcc 4 warning */ int ret; s = newDFA(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newDFA(v, cnfa, cm, &v->dfa2); if (ISERR()) { assert(d == NULL); freeDFA(s); return v->err; } ret = complicatedFindLoop(v, cnfa, cm, d, s, &cold); freeDFA(d); freeDFA(s); NOERR(); if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; }
/* - alternationDissect - determine alternative subexpression matches (uncomplicated) ^ static int alternationDissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ alternationDissect( struct vars *const v, struct subre *t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { int i; assert(t != NULL); assert(t->op == '|'); for (i = 0; t != NULL; t = t->right, i++) { struct dfa *d; MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) { return v->err; } if (longest(v, d, begin, end, NULL) == end) { MDEBUG(("success\n")); freeDFA(d); return dissect(v, t->left, begin, end); } freeDFA(d); } return REG_ASSERT; /* none of them matched?!? */ }
/* - complicatedAlternationDissect - determine alternative subexpression matches (w. - complications) ^ static int complicatedAlternationDissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ complicatedAlternationDissect( struct vars *const v, struct subre *t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { int er; #define UNTRIED 0 /* not yet tried at all */ #define TRYING 1 /* top matched, trying submatches */ #define TRIED 2 /* top didn't match or submatches exhausted */ #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION if (0) { doRight: t = t->right; } #endif if (t == NULL) { return REG_NOMATCH; } assert(t->op == '|'); if (v->mem[t->retry] == TRIED) { goto doRight; } MDEBUG(("cAlt n%d\n", t->retry)); assert(t->left != NULL); if (v->mem[t->retry] == UNTRIED) { struct dfa *d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } if (longest(v, d, begin, end, NULL) != end) { freeDFA(d); v->mem[t->retry] = TRIED; goto doRight; } freeDFA(d); MDEBUG(("cAlt matched\n")); v->mem[t->retry] = TRYING; } er = complicatedDissect(v, t->left, begin, end); if (er != REG_NOMATCH) { return er; } v->mem[t->retry] = TRIED; #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION goto doRight; #else doRight: return complicatedAlternationDissect(v, t->right, begin, end); #endif }
/* - getsubdfa - create or re-fetch the DFA for a subre node * We only need to create the DFA once per overall regex execution. * The DFA will be freed by the cleanup step in exec(). */ static struct dfa * getsubdfa(struct vars * v, struct subre * t) { if (v->subdfas[t->id] == NULL) { v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return NULL; } return v->subdfas[t->id]; }
/* - simpleFind - find a match for the main NFA (no-complications case) ^ static int simpleFind(struct vars *, struct cnfa *, struct colormap *); */ static int simpleFind( struct vars *const v, struct cnfa *const cnfa, struct colormap *const cm) { struct dfa *s, *d; chr *begin, *end = NULL; chr *cold; chr *open, *close; /* Open and close of range of possible * starts */ int hitend; int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; /* * First, a shot with the search RE. */ s = newDFA(v, &v->g->search, cm, &v->dfa1); assert(!(ISERR() && s != NULL)); NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL); freeDFA(s); NOERR(); if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (close == NULL) { /* not found */ return REG_NOMATCH; } if (v->nmatch == 0) { /* found, don't need exact location */ return REG_OKAY; } /* * Find starting point and match. */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); d = newDFA(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) { end = shortest(v, d, begin, begin, v->stop, NULL, &hitend); } else { end = longest(v, d, begin, v->stop, &hitend); } if (ISERR()) { freeDFA(d); return v->err; } if (hitend && cold == NULL) { cold = begin; } if (end != NULL) { break; /* NOTE BREAK OUT */ } } assert(end != NULL); /* search RE succeeded so loop should */ freeDFA(d); /* * And pin down details. */ assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); if (v->g->cflags®_EXPECT) { if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (v->nmatch == 1) { /* no need for submatches */ return REG_OKAY; } /* * Find submatches. */ zapallsubs(v->pmatch, v->nmatch); return cdissect(v, v->g->tree, begin, end); }
/* - complicatedReversedDissect - determine backref shortest-first subexpression - matches * The retry memory stores the offset of the trial midpoint from begin, plus 1 * so that 0 uniquely means "clean slate". ^ static int complicatedReversedDissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ complicatedReversedDissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { struct dfa *d, *d2; chr *mid; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags&SHORTER); /* * Concatenation -- need to split the substring between parts. */ d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freeDFA(d); return v->err; } MDEBUG(("cRev %d\n", t->retry)); /* * Pick a tentative midpoint. */ if (v->mem[t->retry] == 0) { mid = shortest(v, d, begin, begin, end, NULL, NULL); if (mid == NULL) { freeDFA(d); freeDFA(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* * Iterate until satisfaction or failure. */ for (;;) { /* * Try this midpoint on for size. */ if (longest(v, d2, mid, end, NULL) == end) { int er = complicatedDissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = complicatedDissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); freeDFA(d); freeDFA(d2); return REG_OKAY; } } if (er != REG_OKAY && er != REG_NOMATCH) { freeDFA(d); freeDFA(d2); return er; } } /* * That midpoint didn't work, find a new one. */ if (mid == end) { /* * All possibilities exhausted. */ MDEBUG(("%d no midpoint\n", t->retry)); freeDFA(d); freeDFA(d2); return REG_NOMATCH; } mid = shortest(v, d, begin, mid+1, end, NULL, NULL); if (mid == NULL) { /* * Failed to find a new one. */ MDEBUG(("%d failed midpoint\n", t->retry)); freeDFA(d); freeDFA(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapSubtree(v, t->left); zapSubtree(v, t->right); } }
/* - concatenationDissect - determine concatenation subexpression matches - (uncomplicated) ^ static int concatenationDissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ concatenationDissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { struct dfa *d, *d2; chr *mid; int i; int shorter = (t->left->flags&SHORTER) ? 1 : 0; chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); if (ISERR()) { assert(d2 == NULL); freeDFA(d); return v->err; } /* * Pick a tentative midpoint. */ if (shorter) { mid = shortest(v, d, begin, begin, end, NULL, NULL); } else { mid = longest(v, d, begin, end, NULL); } if (mid == NULL) { freeDFA(d); freeDFA(d2); return REG_ASSERT; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. */ while (longest(v, d2, mid, end, NULL) != end) { /* * That midpoint didn't work, find a new one. */ if (mid == stop) { /* * All possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freeDFA(d); freeDFA(d2); return REG_ASSERT; } if (shorter) { mid = shortest(v, d, begin, mid+1, end, NULL, NULL); } else { mid = longest(v, d, begin, mid-1, NULL); } if (mid == NULL) { /* * Failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freeDFA(d); freeDFA(d2); return REG_ASSERT; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } /* * Satisfaction. */ MDEBUG(("successful\n")); freeDFA(d); freeDFA(d2); i = dissect(v, t->left, begin, mid); if (i != REG_OKAY) { return i; } return dissect(v, t->right, mid, end); }