Reprog *regcomp(const char *pattern, int cflags, const char **errorp) { struct cstate g; Renode *node; Reinst *split, *jump; int i; g.prog = malloc(sizeof (Reprog)); g.pstart = g.pend = malloc(sizeof (Renode) * strlen(pattern) * 2); if (setjmp(g.kaboom)) { if (errorp) *errorp = g.error; free(g.pstart); free(g.prog); return NULL; } g.source = pattern; g.ncclass = 0; g.nsub = 1; for (i = 0; i < MAXSUB; ++i) g.sub[i] = 0; g.prog->flags = cflags; next(&g); node = parsealt(&g); if (g.lookahead == ')') die(&g, "unmatched ')'"); if (g.lookahead != 0) die(&g, "syntax error"); g.prog->nsub = g.nsub; g.prog->start = g.prog->end = malloc((count(node) + 6) * sizeof (Reinst)); split = emit(g.prog, I_SPLIT); split->x = split + 3; split->y = split + 1; emit(g.prog, I_ANYNL); jump = emit(g.prog, I_JUMP); jump->x = split; emit(g.prog, I_LPAR); compile(g.prog, node); emit(g.prog, I_RPAR); emit(g.prog, I_END); #ifdef TEST dumpnode(node); putchar('\n'); dumpprog(g.prog); #endif free(g.pstart); if (errorp) *errorp = NULL; return g.prog; }
static Renode *parseatom(struct cstate *g) { Renode *atom; if (g->lookahead == L_CHAR) { atom = newnode(g, P_CHAR); atom->c = g->yychar; next(g); return atom; } if (g->lookahead == L_CCLASS) { atom = newnode(g, P_CCLASS); atom->cc = g->yycc; next(g); return atom; } if (g->lookahead == L_NCCLASS) { atom = newnode(g, P_NCCLASS); atom->cc = g->yycc; next(g); return atom; } if (g->lookahead == L_REF) { atom = newnode(g, P_REF); if (g->yychar == 0 || g->yychar > g->nsub || !g->sub[g->yychar]) die(g, "invalid back-reference"); atom->n = g->yychar; atom->x = g->sub[g->yychar]; next(g); return atom; } if (accept(g, '.')) return newnode(g, P_ANY); if (accept(g, '(')) { atom = newnode(g, P_PAR); if (g->nsub == MAXSUB) die(g, "too many captures"); atom->n = g->nsub++; atom->x = parsealt(g); g->sub[atom->n] = atom; if (!accept(g, ')')) die(g, "unmatched '('"); return atom; } if (accept(g, L_NC)) { atom = parsealt(g); if (!accept(g, ')')) die(g, "unmatched '('"); return atom; } if (accept(g, L_PLA)) { atom = newnode(g, P_PLA); atom->x = parsealt(g); if (!accept(g, ')')) die(g, "unmatched '('"); return atom; } if (accept(g, L_NLA)) { atom = newnode(g, P_NLA); atom->x = parsealt(g); if (!accept(g, ')')) die(g, "unmatched '('"); return atom; } die(g, "syntax error"); return NULL; }
static Renode *parseatom(void) { Renode *atom; if (g.lookahead == L_CHAR) { atom = newnode(P_CHAR); atom->c = g.yychar; next(); return atom; } if (g.lookahead == L_CCLASS) { atom = newnode(P_CCLASS); atom->cc = g.yycc; next(); return atom; } if (g.lookahead == L_NCCLASS) { atom = newnode(P_NCCLASS); atom->cc = g.yycc; next(); return atom; } if (g.lookahead == L_REF) { atom = newnode(P_REF); if (g.yychar == 0 || g.yychar > g.nsub || !g.sub[g.yychar]) die("invalid back-reference"); atom->n = g.yychar; atom->x = g.sub[g.yychar]; next(); return atom; } if (re_accept('.')) return newnode(P_ANY); if (re_accept('(')) { atom = newnode(P_PAR); if (g.nsub == MAXSUB) die("too many captures"); atom->n = g.nsub++; atom->x = parsealt(); g.sub[atom->n] = atom; if (!re_accept(')')) die("unmatched '('"); return atom; } if (re_accept(L_NC)) { atom = parsealt(); if (!re_accept(')')) die("unmatched '('"); return atom; } if (re_accept(L_PLA)) { atom = newnode(P_PLA); atom->x = parsealt(); if (!re_accept(')')) die("unmatched '('"); return atom; } if (re_accept(L_NLA)) { atom = newnode(P_NLA); atom->x = parsealt(); if (!re_accept(')')) die("unmatched '('"); return atom; } die("syntax error"); return NULL; }