/* * altdissect - determine alternative subexpression matches (uncomplicated) */ static int /* regexec return code */ altdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; int i; assert(t != NULL); assert(t->op == '|'); for (i = 0; t != NULL; t = t->right, i++) { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } return REG_ASSERT; /* none of them matched?!? */ }
/* - lacon - lookahead-constraint checker for miss() ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); */ static int /* predicate: constraint satisfied? */ lacon( struct vars *v, struct cnfa *pcnfa, /* parent cnfa */ chr *cp, pcolor co) /* "color" of the lookahead constraint */ { int n; struct subre *sub; struct dfa *d; struct smalldfa sd; chr *end; n = co - pcnfa->ncolors; assert(n < v->g->nlacons && v->g->lacons != NULL); FDEBUG(("=== testing lacon %d\n", n)); sub = &v->g->lacons[n]; d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); if (d == NULL) { ERR(REG_ESPACE); return 0; } end = longest(v, d, cp, v->stop, (int *)NULL); freedfa(d); FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); return (sub->subno) ? (end != NULL) : (end == NULL); }
/* - caltdissect - determine alternative subexpression matches (w. complications) ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ caltdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; int er; #define UNTRIED 0 /* not yet tried at all */ #define TRYING 1 /* top matched, trying submatches */ #define TRIED 2 /* top didn't match or submatches exhausted */ if (t == NULL) { return REG_NOMATCH; } assert(t->op == '|'); if (v->mem[t->retry] == TRIED) { return caltdissect(v, t->right, begin, end); } MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); if (v->mem[t->retry] == UNTRIED) { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } if (longest(v, d, begin, end, NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); } freedfa(d); MDEBUG(("calt matched\n")); v->mem[t->retry] = TRYING; } er = cdissect(v, t->left, begin, end); if (er != REG_NOMATCH) { return er; } v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); }
/* * cfind - find a match for the main NFA (with complications) */ static int cfind(struct vars * v, struct cnfa * cnfa, struct colormap * cm) { struct dfa *s; struct dfa *d; chr *cold; int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); if (ISERR()) { assert(d == NULL); freedfa(s); return v->err; } ret = cfindloop(v, cnfa, cm, d, s, &cold); freedfa(d); freedfa(s); NOERR(); if (v->g->cflags & REG_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; }
/* * crevdissect - determine backref shortest-first subexpression matches * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */ static int /* regexec return code */ crevdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags & SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ if (v->mem[t->retry] == 0) { mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && longest(v, d2, mid, end, (int *) NULL) == end && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) break; /* NOTE BREAK OUT */ if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ if (mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapmem(v, t->left); zapmem(v, t->right); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return REG_OKAY; }
/* * condissect - determine concatenation subexpression matches (uncomplicated) */ static int /* regexec return code */ condissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int i; int shorter = (t->left->flags & SHORTER) ? 1 : 0; chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); if (ISERR()) { assert(d2 == NULL); freedfa(d); return v->err; } /* pick a tentative midpoint */ if (shorter) mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end, (int *) NULL) != end) { /* that midpoint didn't work, find a new one */ if (mid == stop) { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } if (shorter) mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); i = dissect(v, t->left, begin, mid); if (i != REG_OKAY) return i; return dissect(v, t->right, mid, end); }
/* * find - find a match for the main NFA (no-complications case) */ static int find(struct vars * v, struct cnfa * cnfa, struct colormap * cm) { struct dfa *s; struct dfa *d; chr *begin; chr *end = NULL; chr *cold; chr *open; /* open and close of range of possible starts */ chr *close; int hitend; int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); assert(!(ISERR() && s != NULL)); NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; close = shortest(v, s, v->search_start, v->search_start, v->stop, &cold, (int *) NULL); freedfa(s); NOERR(); if (v->g->cflags & REG_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (close == NULL) /* not found */ return REG_NOMATCH; if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, (chr **) NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); /* and pin down details */ assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); if (v->g->cflags & REG_EXPECT) { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ zapsubs(v->pmatch, v->nmatch); return dissect(v, v->g->tree, begin, end); }
/* - newdfa - set up a fresh DFA ^ static struct dfa *newdfa(struct vars *, struct cnfa *, ^ struct colormap *, struct smalldfa *); */ static struct dfa * newdfa( struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml) /* preallocated space, may be NULL */ { struct dfa *d; size_t nss = cnfa->nstates * 2; int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; struct smalldfa *smallwas = sml; assert(cnfa != NULL && cnfa->nstates != 0); if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { assert(wordsper == 1); if (sml == NULL) { sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa)); if (sml == NULL) { ERR(REG_ESPACE); return NULL; } } d = &sml->dfa; d->ssets = sml->ssets; d->statesarea = sml->statesarea; d->work = &d->statesarea[nss]; d->outsarea = sml->outsarea; d->incarea = sml->incarea; d->cptsmalloced = 0; d->mallocarea = (smallwas == NULL) ? (char *)sml : NULL; } else { d = (struct dfa *)MALLOC(sizeof(struct dfa)); if (d == NULL) { ERR(REG_ESPACE); return NULL; } d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); d->statesarea = (unsigned *) MALLOC((nss+WORK) * wordsper * sizeof(unsigned)); d->work = &d->statesarea[nss * wordsper]; d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * sizeof(struct sset *)); d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * sizeof(struct arcp)); d->cptsmalloced = 1; d->mallocarea = (char *)d; if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL || d->incarea == NULL) { freedfa(d); ERR(REG_ESPACE); return NULL; } } d->nssets = (v->eflags®_SMALL) ? 7 : nss; d->nssused = 0; d->nstates = cnfa->nstates; d->ncolors = cnfa->ncolors; d->wordsper = wordsper; d->cnfa = cnfa; d->cm = cm; d->lastpost = NULL; d->lastnopr = NULL; d->search = d->ssets; /* * Initialization of sset fields is done as needed. */ return d; }
/* - ccondissect - concatenation subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin, plus 1 * so that 0 uniquely means "clean slate". ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ ccondissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); if (t->left->flags&SHORTER) { /* reverse scan */ return crevdissect(v, t, begin, end); } d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* * Pick a tentative midpoint. */ if (v->mem[t->retry] == 0) { mid = longest(v, d, begin, end, NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* * Iterate until satisfaction or failure. */ for (;;) { /* * Try this midpoint on for size. */ er = cdissect(v, t->left, begin, mid); if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end) && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) { break; /* NOTE BREAK OUT */ } if ((er != REG_OKAY) && (er != REG_NOMATCH)) { freedfa(d); freedfa(d2); return er; } /* * That midpoint didn't work, find a new one. */ if (mid == begin) { /* * All possibilities exhausted. */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } mid = longest(v, d, begin, mid-1, NULL); if (mid == NULL) { /* * Failed to find a new one. */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapmem(v, t->left); zapmem(v, t->right); } /* * Satisfaction. */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return REG_OKAY; }
/* * pg_regexec - match regular expression */ int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags) { struct vars var; register struct vars *v = &var; int st; size_t n; size_t i; int backref; #define LOCALMAT 20 regmatch_t mat[LOCALMAT]; #define LOCALDFAS 40 struct dfa *subdfas[LOCALDFAS]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) return REG_INVARG; if (re->re_csize != sizeof(chr)) return REG_MIXED; /* Initialize locale-dependent support */ pg_set_regex_collation(re->re_collation); /* setup */ v->re = re; v->g = (struct guts *) re->re_guts; if ((v->g->cflags & REG_EXPECT) && details == NULL) return REG_INVARG; if (v->g->info & REG_UIMPOSSIBLE) return REG_NOMATCH; backref = (v->g->info & REG_UBACKREF) ? 1 : 0; v->eflags = flags; if (v->g->cflags & REG_NOSUB) nmatch = 0; /* override client */ v->nmatch = nmatch; if (backref) { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; } else v->pmatch = pmatch; v->details = details; v->start = (chr *) string; v->search_start = (chr *) string + search_start; v->stop = (chr *) string + len; v->err = 0; assert(v->g->ntree >= 0); n = (size_t) v->g->ntree; if (n <= LOCALDFAS) v->subdfas = subdfas; else v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->subdfas == NULL) { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } for (i = 0; i < n; i++) v->subdfas[i] = NULL; /* do it */ assert(v->g->tree != NULL); if (backref) st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); else st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapallsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); } /* clean up */ if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); n = (size_t) v->g->ntree; for (i = 0; i < n; i++) { if (v->subdfas[i] != NULL) freedfa(v->subdfas[i]); } if (v->subdfas != subdfas) FREE(v->subdfas); return st; }