/* * pg_regprefix - get common prefix for regular expression * * Returns one of: * REG_NOMATCH: there is no common prefix of strings matching the regex * REG_PREFIX: there is a common prefix of strings matching the regex * REG_EXACT: all strings satisfying the regex must match the same string * or a REG_XXX error code * * In the non-failure cases, *string is set to a malloc'd string containing * the common prefix or exact value, of length *slength (measured in chrs * not bytes!). * * This function does not analyze all complex cases (such as lookahead * constraints) exactly. Therefore it is possible that some strings matching * the reported prefix or exact-match string do not satisfy the regex. But * it should never be the case that a string satisfying the regex does not * match the reported prefix or exact-match string. */ int pg_regprefix(regex_t *re, chr **string, size_t *slength) { struct guts *g; struct cnfa *cnfa; int st; /* sanity checks */ if (string == NULL || slength == NULL) return REG_INVARG; *string = NULL; /* initialize for failure cases */ *slength = 0; if (re == NULL || re->re_magic != REMAGIC) return REG_INVARG; if (re->re_csize != sizeof(chr)) return REG_MIXED; /* Initialize locale-dependent support */ pg_set_regex_collation(re->re_collation); /* setup */ g = (struct guts *) re->re_guts; if (g->info & REG_UIMPOSSIBLE) return REG_NOMATCH; /* * This implementation considers only the search NFA for the topmost regex * tree node. Therefore, constraints such as backrefs are not fully * applied, which is allowed per the function's API spec. */ assert(g->tree != NULL); cnfa = &g->tree->cnfa; /* * Since a correct NFA should never contain any exit-free loops, it should * not be possible for our traversal to return to a previously visited NFA * state. Hence we need at most nstates chrs in the output string. */ *string = (chr *) MALLOC(cnfa->nstates * sizeof(chr)); if (*string == NULL) return REG_ESPACE; /* do it */ st = findprefix(cnfa, &g->cmap, *string, slength); assert(*slength <= cnfa->nstates); /* clean up */ if (st != REG_PREFIX && st != REG_EXACT) { FREE(*string); *string = NULL; *slength = 0; } return st; }
/* * pg_regexec - match regular expression */ int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags) { struct vars var; register struct vars *v = &var; int st; size_t n; size_t i; int backref; #define LOCALMAT 20 regmatch_t mat[LOCALMAT]; #define LOCALDFAS 40 struct dfa *subdfas[LOCALDFAS]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) return REG_INVARG; if (re->re_csize != sizeof(chr)) return REG_MIXED; /* Initialize locale-dependent support */ pg_set_regex_collation(re->re_collation); /* setup */ v->re = re; v->g = (struct guts *) re->re_guts; if ((v->g->cflags & REG_EXPECT) && details == NULL) return REG_INVARG; if (v->g->info & REG_UIMPOSSIBLE) return REG_NOMATCH; backref = (v->g->info & REG_UBACKREF) ? 1 : 0; v->eflags = flags; if (v->g->cflags & REG_NOSUB) nmatch = 0; /* override client */ v->nmatch = nmatch; if (backref) { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; } else v->pmatch = pmatch; v->details = details; v->start = (chr *) string; v->search_start = (chr *) string + search_start; v->stop = (chr *) string + len; v->err = 0; assert(v->g->ntree >= 0); n = (size_t) v->g->ntree; if (n <= LOCALDFAS) v->subdfas = subdfas; else v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->subdfas == NULL) { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } for (i = 0; i < n; i++) v->subdfas[i] = NULL; /* do it */ assert(v->g->tree != NULL); if (backref) st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); else st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapallsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); } /* clean up */ if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); n = (size_t) v->g->ntree; for (i = 0; i < n; i++) { if (v->subdfas[i] != NULL) freedfa(v->subdfas[i]); } if (v->subdfas != subdfas) FREE(v->subdfas); return st; }