fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ /* anchor = 1 for anchored matches, else 0 */ { Node *p, *p1; fa *f; p = reparse(s); p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); /* put ALL STAR in front of reg. exp. */ p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); /* put FINAL after reg. exp. */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) overflo("out of space for fa"); f->accept = poscnt-1; /* penter has computed number of positions in re */ cfoll(f, p1); /* set up follow sets */ freetr(p1); if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) overflo("out of space in makedfa"); if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) overflo("out of space in makedfa"); *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; f->restr = (uschar *) tostring(s); return f; }
void cfail() { struct words *queue[QSIZE]; struct words **front, **rear; struct words *state; int bstart; char c; struct words *s; s = w; front = rear = queue; init: if ((s->inp) != 0) { *rear++ = s->nst; if (rear >= &queue[QSIZE - 1]) overflo(); } if ((s = s->link) != 0) { goto init; } while (rear != front) { s = *front; if (front == &queue[QSIZE-1]) front = queue; else front++; cloop: if ((c = s->inp) != 0) { bstart = 0; *rear = (q = s->nst); if (front < rear) if (rear >= &queue[QSIZE-1]) if (front == queue) overflo(); else rear = queue; else rear++; else if (++rear == front) overflo(); state = s->fail; floop: if (state == 0) { state = w; bstart = 1; } if (state->inp == c) { qloop: q->fail = state->nst; if ((state->nst)->out == 1) q->out = 1; if ((q = q->link) != 0) goto qloop; } else if ((state = state->link) != 0) goto floop; else if (bstart == 0){ state = 0; goto floop; } } if ((s = s->link) != 0) goto cloop; } }
int makeinit(fa *f, int anchor) { int i, k; f->curstat = 2; f->out[2] = 0; f->reset = 0; k = *(f->re[0].lfollow); xfree(f->posns[2]); if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of space in makeinit"); for (i=0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; } if ((f->posns[2])[1] == f->accept) f->out[2] = 1; for (i=0; i < NCHARS; i++) f->gototab[2][i] = 0; f->curstat = cgoto(f, 2, HAT); if (anchor) { *f->posns[2] = k-1; /* leave out position 0 */ for (i=0; i < k; i++) { (f->posns[0])[i] = (f->posns[2])[i]; } f->out[0] = f->out[2]; if (f->curstat != 2) --(*f->posns[f->curstat]); } return f->curstat; }
int pmatch(fa *f, const char *p0) /* longest match, for sub */ { int s, ns; uschar *p = (uschar *) p0; uschar *q; int i, k; /* s = f->reset ? makeinit(f,1) : f->initstat; */ if (f->reset) { f->initstat = s = makeinit(f,1); } else { s = f->initstat; } patbeg = (char *) p; patlen = -1; do { q = p; do { if (f->out[s]) /* final state */ patlen = q-p; /* assert(*q < NCHARS); */ if ((ns = f->gototab[s][*q]) != 0) s = ns; else s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen >= 0) { patbeg = (char *) p; return(1); } else goto nextin; /* no match */ } } while (*q++ != 0); if (f->out[s]) patlen = q-p-1; /* don't count $ */ if (patlen >= 0) { patbeg = (char *) p; return(1); } nextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of space in pmatch"); for (i = 0; i <= k; i++) (f->posns[2])[i] = (f->posns[0])[i]; f->initstat = f->curstat = 2; f->out[2] = f->out[0]; for (i = 0; i < NCHARS; i++) f->gototab[2][i] = 0; } } while (*p++ != 0); return (0); }
void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ { int i; int *p; switch (type(v)) { ELEAF LEAF f->re[info(v)].ltype = type(v); f->re[info(v)].lval.np = right(v); while (f->accept >= maxsetvec) { /* guessing here! */ setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); if (setvec == 0 || tmpset == 0) overflo("out of space in cfoll()"); maxsetvec *= 4; } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) overflo("out of space building follow set"); f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) if (setvec[i] == 1) *++p = i; break; UNARY cfoll(f,left(v)); break; case CAT: case OR: cfoll(f,left(v)); cfoll(f,right(v)); break; default: /* can't happen */ FATAL("can't happen: unknown type %d in cfoll", type(v)); } }
/** * make automaton. */ void cgotofn(const char *pattern) { int c; struct words *s; s = smax = w; nword: for(;;) { c = *pattern++; if (c==0) return; if (c == '\n') { s->out = 1; s = w; } else { loop: if (s->inp == c) { s = s->nst; continue; } if (s->inp == 0) goto enter; if (s->link == 0) { if (smax >= &w[MAXSIZ - 1]) overflo(); s->link = ++smax; s = smax; goto enter; } s = s->link; goto loop; } } enter: do { s->inp = c; if (smax >= &w[MAXSIZ - 1]) overflo(); s->nst = ++smax; s = smax; } while ((c = *pattern++) != '\n' && c!=0); smax->out = 1; s = w; if (c != 0) goto nword; }
int nematch(fa *f, char *p0) /* non-empty match, for sub */ { int s, ns; uschar *p = (uschar *) p0; uschar *q; int i, k; s = f->reset ? makeinit(f,1) : f->initstat; patlen = -1; while (*p) { q = p; do { if (f->out[s]) /* final state */ patlen = q-p; if ((ns = f->gototab[s][*q]) != 0) s = ns; else s = cgoto(f, s, *q); if (s == 1) /* no transition */ if (patlen > 0) { patbeg = (char *) p; return(1); } else goto nnextin; /* no nonempty match */ } while (*q++ != 0); if (f->out[s]) patlen = q-p-1; /* don't count $ */ if (patlen > 0 ) { patbeg = (char *) p; return(1); } nnextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of state space"); for (i = 0; i <= k; i++) (f->posns[2])[i] = (f->posns[0])[i]; f->initstat = f->curstat = 2; f->out[2] = f->out[0]; for (i = 0; i < NCHARS; i++) f->gototab[2][i] = 0; } p++; } return (0); }
int first(Node *p) /* collects initially active leaves of p into setvec */ /* returns 0 if p matches empty string */ { int b, lp; switch (type(p)) { ELEAF LEAF lp = info(p); /* look for high-water mark of subscripts */ while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); if (setvec == 0 || tmpset == 0) overflo("out of space in first()"); maxsetvec *= 4; } if (type(p) == EMPTYRE) { setvec[lp] = 0; return(0); } if (setvec[lp] != 1) { setvec[lp] = 1; setcnt++; } if (type(p) == CCL && (*(char *) right(p)) == '\0') return(0); /* empty CCL */ else return(1); case PLUS: if (first(left(p)) == 0) return(0); return(1); case STAR: case QUEST: first(left(p)); return(0); case CAT: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); return(1); case OR: b = first(right(p)); if (first(left(p)) == 0 || b == 0) return(0); return(1); } FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ return(-1); }
fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; static int now = 1; if (setvec == 0) { /* first time through any RE */ maxsetvec = MAXLIN; setvec = (int *) malloc(maxsetvec * sizeof(int)); tmpset = (int *) malloc(maxsetvec * sizeof(int)); if (setvec == 0 || tmpset == 0) overflo("out of space initializing makedfa"); } if (compile_time) /* a constant for sure */ return mkdfa(s, anchor); for (i = 0; i < nfatab; i++) /* is it there already? */ if (fatab[i]->anchor == anchor && strcmp((const char *) fatab[i]->restr, s) == 0) { fatab[i]->use = now++; return fatab[i]; } pfa = mkdfa(s, anchor); if (nfatab < NFA) { /* room for another */ fatab[nfatab] = pfa; fatab[nfatab]->use = now++; nfatab++; return pfa; } use = fatab[0]->use; /* replace least-recently used */ nuse = 0; for (i = 1; i < nfatab; i++) if (fatab[i]->use < use) { use = fatab[i]->use; nuse = i; } freefa(fatab[nuse]); fatab[nuse] = pfa; pfa->use = now++; return pfa; }
int cgoto(fa *f, int s, int c) { int i, j, k; int *p, *q; assert(c == HAT || c < NCHARS); while (f->accept >= maxsetvec) { /* guessing here! */ maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); if (setvec == 0 || tmpset == 0) overflo("out of space in cgoto()"); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { if ((k = f->re[p[i]].ltype) != FINAL) { if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) || (k == DOT && c != 0 && c != HAT) || (k == ALL && c != 0) || (k == EMPTYRE && c != 0) || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { if (q[j] >= maxsetvec) { maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); if (setvec == 0 || tmpset == 0) overflo("cgoto overflow"); } if (setvec[q[j]] == 0) { setcnt++; setvec[q[j]] = 1; } } } } } /* determine if setvec is a previous state */ tmpset[0] = setcnt; j = 1; for (i = f->accept; i >= 0; i--) if (setvec[i]) { tmpset[j++] = i; } /* tmpset == previous state? */ for (i = 1; i <= f->curstat; i++) { p = f->posns[i]; if ((k = tmpset[0]) != p[0]) goto different; for (j = 1; j <= k; j++) if (tmpset[j] != p[j]) goto different; /* setvec is state i */ f->gototab[s][c] = i; return i; different: ; } /* add tmpset to current set of states */ if (f->curstat >= NSTATES-1) { f->curstat = 2; f->reset = 1; for (i = 2; i < NSTATES; i++) xfree(f->posns[i]); } else ++(f->curstat); for (i = 0; i < NCHARS; i++) f->gototab[f->curstat][i] = 0; xfree(f->posns[f->curstat]); if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) overflo("out of space in cgoto"); f->posns[f->curstat] = p; f->gototab[s][c] = f->curstat; for (i = 0; i <= setcnt; i++) p[i] = tmpset[i]; if (setvec[f->accept]) f->out[f->curstat] = 1; else f->out[f->curstat] = 0; return f->curstat; }