/* * lexstart - set up lexical stuff, scan leading options */ static void lexstart(struct vars * v) { prefixes(v); /* may turn on new type bits etc. */ NOERR(); if (v->cflags & REG_QUOTE) { assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))); INTOCON(L_Q); } else if (v->cflags & REG_EXTENDED) { assert(!(v->cflags & REG_QUOTE)); INTOCON(L_ERE); } else { assert(!(v->cflags & (REG_QUOTE | REG_ADVF))); INTOCON(L_BRE); } v->nexttype = EMPTY; /* remember we were at the start */ next(v); /* set up the first token */ }
/* - complicatedFind - find a match for the main NFA (with complications) ^ static int complicatedFind(struct vars *, struct cnfa *, struct colormap *); */ static int complicatedFind( struct vars *const v, struct cnfa *const cnfa, struct colormap *const cm) { struct dfa *s, *d; chr *cold = NULL; /* silence gcc 4 warning */ int ret; s = newDFA(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newDFA(v, cnfa, cm, &v->dfa2); if (ISERR()) { assert(d == NULL); freeDFA(s); return v->err; } ret = complicatedFindLoop(v, cnfa, cm, d, s, &cold); freeDFA(d); freeDFA(s); NOERR(); if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; }
/* - caltdissect - dissect match for alternation node ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ caltdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; int er; /* We loop, rather than tail-recurse, to handle a chain of alternatives */ while (t != NULL) { assert(t->op == '|'); assert(t->left != NULL && t->left->cnfa.nstates > 0); MDEBUG(("calt n%d\n", t->id)); d = getsubdfa(v, t->left); NOERR(); if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("calt matched\n")); er = cdissect(v, t->left, begin, end); if (er != REG_NOMATCH) { return er; } } t = t->right; } return REG_NOMATCH; }
/* * cfind - find a match for the main NFA (with complications) */ static int cfind(struct vars * v, struct cnfa * cnfa, struct colormap * cm) { struct dfa *s; struct dfa *d; chr *cold; int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); if (ISERR()) { assert(d == NULL); freedfa(s); return v->err; } ret = cfindloop(v, cnfa, cm, d, s, &cold); freedfa(d); freedfa(s); NOERR(); if (v->g->cflags & REG_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; }
/* * condissect - determine concatenation subexpression matches (uncomplicated) */ static int /* regexec return code */ condissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int i; int shorter = (t->left->flags & SHORTER) ? 1 : 0; chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); if (ISERR()) { assert(d2 == NULL); freedfa(d); return v->err; } /* pick a tentative midpoint */ if (shorter) mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end, (int *) NULL) != end) { /* that midpoint didn't work, find a new one */ if (mid == stop) { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } if (shorter) mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); i = dissect(v, t->left, begin, mid); if (i != REG_OKAY) return i; return dissect(v, t->right, mid, end); }
/* * find - find a match for the main NFA (no-complications case) */ static int find(struct vars * v, struct cnfa * cnfa, struct colormap * cm) { struct dfa *s; struct dfa *d; chr *begin; chr *end = NULL; chr *cold; chr *open; /* open and close of range of possible starts */ chr *close; int hitend; int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); assert(!(ISERR() && s != NULL)); NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; close = shortest(v, s, v->search_start, v->search_start, v->stop, &cold, (int *) NULL); freedfa(s); NOERR(); if (v->g->cflags & REG_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (close == NULL) /* not found */ return REG_NOMATCH; if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, (chr **) NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); /* and pin down details */ assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); if (v->g->cflags & REG_EXPECT) { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ zapsubs(v->pmatch, v->nmatch); return dissect(v, v->g->tree, begin, end); }
/* - crevcondissect - dissect match for concatenation node, shortest-first ^ static int crevcondissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ crevcondissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d, *d2; chr *mid; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags&SHORTER); d = getsubdfa(v, t->left); NOERR(); d2 = getsubdfa(v, t->right); NOERR(); MDEBUG(("crevcon %d\n", t->id)); /* * Pick a tentative midpoint. */ mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. */ for (;;) { /* * Try this midpoint on for size. */ if (longest(v, d2, mid, end, NULL) == end) { int er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); return REG_OKAY; } } if (er != REG_NOMATCH) { return er; } } /* * That midpoint didn't work, find a new one. */ if (mid == end) { /* * All possibilities exhausted. */ MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = shortest(v, d, begin, mid+1, end, NULL, NULL); if (mid == NULL) { /* * Failed to find a new one. */ MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } }
/* - simpleFind - find a match for the main NFA (no-complications case) ^ static int simpleFind(struct vars *, struct cnfa *, struct colormap *); */ static int simpleFind( struct vars *const v, struct cnfa *const cnfa, struct colormap *const cm) { struct dfa *s, *d; chr *begin, *end = NULL; chr *cold; chr *open, *close; /* Open and close of range of possible * starts */ int hitend; int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; /* * First, a shot with the search RE. */ s = newDFA(v, &v->g->search, cm, &v->dfa1); assert(!(ISERR() && s != NULL)); NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL); freeDFA(s); NOERR(); if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (close == NULL) { /* not found */ return REG_NOMATCH; } if (v->nmatch == 0) { /* found, don't need exact location */ return REG_OKAY; } /* * Find starting point and match. */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); d = newDFA(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) { end = shortest(v, d, begin, begin, v->stop, NULL, &hitend); } else { end = longest(v, d, begin, v->stop, &hitend); } if (ISERR()) { freeDFA(d); return v->err; } if (hitend && cold == NULL) { cold = begin; } if (end != NULL) { break; /* NOTE BREAK OUT */ } } assert(end != NULL); /* search RE succeeded so loop should */ freeDFA(d); /* * And pin down details. */ assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); if (v->g->cflags®_EXPECT) { if (cold != NULL) { v->details->rm_extend.rm_so = OFF(cold); } else { v->details->rm_extend.rm_so = OFF(v->stop); } v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (v->nmatch == 1) { /* no need for submatches */ return REG_OKAY; } /* * Find submatches. */ zapallsubs(v->pmatch, v->nmatch); return cdissect(v, v->g->tree, begin, end); }
/* * ccondissect - dissect match for concatenation node */ static int /* regexec return code */ ccondissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(!(t->left->flags & SHORTER)); d = getsubdfa(v, t->left); NOERR(); d2 = getsubdfa(v, t->right); NOERR(); MDEBUG(("cconcat %d\n", t->id)); /* pick a tentative midpoint */ mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) return REG_NOMATCH; MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ for (;;) { /* try this midpoint on for size */ if (longest(v, d2, mid, end, (int *) NULL) == end) { er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* satisfaction */ MDEBUG(("successful\n")); return REG_OKAY; } } if (er != REG_NOMATCH) return er; } /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } /* can't get here */ return REG_ASSERT; }