/* - matcher - the actual matching engine == static int matcher(struct re_guts *g, const char *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { const char *endp; int i; struct match mv; struct match *m = &mv; const char *dp; const sopno gf = g->firststate+1; /* +1 for OEND */ const sopno gl = g->laststate; const char *start; const char *stop; /* Boyer-Moore algorithms variables */ const char *pp; int cj, mj; const char *mustfirst; const char *mustlast; int *matchjump; int *charjump; /* simplify the situation where possible */ if (g->cflags®_NOSUB) nmatch = 0; if (eflags®_STARTEND) { start = string + pmatch[0].rm_so; stop = string + pmatch[0].rm_eo; } else { start = string; stop = start + strlen(start); } if (stop < start) return(REG_INVARG); /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { if (g->charjump != NULL && g->matchjump != NULL) { mustfirst = g->must; mustlast = g->must + g->mlen - 1; charjump = g->charjump; matchjump = g->matchjump; pp = mustlast; for (dp = start+g->mlen-1; dp < stop;) { /* Fast skip non-matches */ while (dp < stop && charjump[(int)*dp]) dp += charjump[(int)*dp]; if (dp >= stop) break; /* Greedy matcher */ /* We depend on not being used for * for strings of length 1 */ while (*--dp == *--pp && pp != mustfirst); if (*dp == *pp) break; /* Jump to next possible match */ mj = matchjump[pp - mustfirst]; cj = charjump[(int)*dp]; dp += (cj < mj ? mj : cj); pp = mustlast; } if (pp != mustfirst) return(REG_NOMATCH); } else { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && memcmp(dp, g->must, (size_t)g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); } } /* match struct setup */ m->g = g; m->eflags = eflags; m->pmatch = NULL; m->lastpos = NULL; m->offp = string; m->beginp = start; m->endp = stop; STATESETUP(m, 4); SETUP(m->st); SETUP(m->fresh); SETUP(m->tmp); SETUP(m->empty); CLEAR(m->empty); ZAPSTATE(&m->mbs); /* Adjust start according to moffset, to speed things up */ if (g->moffset > -1) start = ((dp - g->moffset) < start) ? start : dp - g->moffset; /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos); STATETEARDOWN(m); return(REG_NOMATCH); } if (nmatch == 0 && !g->backrefs) break; /* no further info needed */ /* where? */ assert(m->coldp != NULL); for (;;) { NOTE("finding start"); endp = slow(m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); m->coldp += XMBRTOWC(NULL, m->coldp, m->endp - m->coldp, &m->mbs, 0); } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ /* oh my, he wants the subexpressions... */ if (m->pmatch == NULL) m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * sizeof(regmatch_t)); if (m->pmatch == NULL) { STATETEARDOWN(m); return(REG_ESPACE); } for (i = 1; i <= m->g->nsub; i++) m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) m->lastpos = malloc((g->nplus+1) * sizeof(const char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); return(REG_ESPACE); } NOTE("backref dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } if (dp != NULL) break; /* uh-oh... we couldn't find a subexpression-level match */ assert(g->backrefs); /* must be back references doing it */ assert(g->nplus == 0 || m->lastpos != NULL); for (;;) { if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); endp = slow(m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ #ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) { assert(m->pmatch[i].rm_so == -1); assert(m->pmatch[i].rm_eo == -1); } #endif NOTE("backoff dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ break; /* despite initial appearances, there is no match here */ NOTE("false alarm"); /* recycle starting later */ start = m->coldp + XMBRTOWC(NULL, m->coldp, stop - m->coldp, &m->mbs, 0); assert(start <= stop); } /* fill in the details if requested */ if (nmatch > 0) { pmatch[0].rm_so = m->coldp - m->offp; pmatch[0].rm_eo = endp - m->offp; } if (nmatch > 1) { assert(m->pmatch != NULL); for (i = 1; i < nmatch; i++) if (i <= m->g->nsub) pmatch[i] = m->pmatch[i]; else { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } } if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos); STATETEARDOWN(m); return(0); }
/* - matcher - the actual matching engine */ static int /* 0 success, R_REGEX_NOMATCH failure */ matcher(struct re_guts *g, char *string, size_t nmatch, RRegexMatch pmatch[], int eflags) { char *endp; int i; struct match mv; struct match *m = &mv; char *dp; const sopno gf = g->firststate+1; /* +1 for OEND */ const sopno gl = g->laststate; char *start; char *stop; /* simplify the situation where possible */ if (g->cflags&R_REGEX_NOSUB) nmatch = 0; if (eflags&R_REGEX_STARTEND) { start = string + pmatch[0].rm_so; stop = string + pmatch[0].rm_eo; } else { start = string; stop = start + strlen(start); } if (stop < start) return(R_REGEX_INVARG); /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && memcmp(dp, g->must, (size_t)g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(R_REGEX_NOMATCH); } /* match struct setup */ m->g = g; m->eflags = eflags; m->pmatch = NULL; m->lastpos = NULL; m->offp = string; m->beginp = start; m->endp = stop; STATESETUP(m, 4); SETUP(m->st); SETUP(m->fresh); SETUP(m->tmp); SETUP(m->empty); CLEAR(m->empty); /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ free(m->pmatch); free(m->lastpos); STATETEARDOWN(m); return(R_REGEX_NOMATCH); } if (nmatch == 0 && !g->backrefs) break; /* no further info needed */ /* where? */ assert(m->coldp != NULL); for (;;) { NOTE("finding start"); endp = slow(m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); m->coldp++; } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ /* oh my, he wants the subexpressions... */ if (m->pmatch == NULL) m->pmatch = (RRegexMatch *)malloc((m->g->nsub + 1) * sizeof(RRegexMatch)); if (m->pmatch == NULL) { STATETEARDOWN(m); return(R_REGEX_ESPACE); } for (i = 1; i <= m->g->nsub; i++) m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags&R_REGEX_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) m->lastpos = (char **)malloc((g->nplus+1) * sizeof(char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); return(R_REGEX_ESPACE); } NOTE("backref dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } if (dp != NULL) break; /* uh-oh... we couldn't find a subexpression-level match */ assert(g->backrefs); /* must be back references doing it */ assert(g->nplus == 0 || m->lastpos != NULL); for (;;) { if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); endp = slow(m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ #ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) { assert(m->pmatch[i].rm_so == -1); assert(m->pmatch[i].rm_eo == -1); } #endif NOTE("backoff dissect"); dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ break; /* despite initial appearances, there is no match here */ NOTE("false alarm"); if (m->coldp == stop) break; start = m->coldp + 1; /* recycle starting later */ } /* fill in the details if requested */ if (nmatch > 0) { pmatch[0].rm_so = m->coldp - m->offp; pmatch[0].rm_eo = endp - m->offp; } if (nmatch > 1) { assert(m->pmatch != NULL); for (i = 1; i < nmatch; i++) if (i <= m->g->nsub) pmatch[i] = m->pmatch[i]; else { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } } if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos); STATETEARDOWN(m); return(0); }