/* - p_ere - ERE parser top level, concatenation and alternation == static void p_ere(struct parse *p, int stop, size_t reclimit); */ static void p_ere( struct parse *p, int stop, /* character this ERE should end at */ size_t reclimit) { char c; sopno prevback = 0; /* pacify gcc */ sopno prevfwd = 0; /* pacify gcc */ sopno conc; int first = 1; /* is this the first alternative? */ _DIAGASSERT(p != NULL); if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { p->error = REG_ESPACE; return; } for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) p_ere_exp(p, reclimit); REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) break; /* NOTE BREAK OUT */ if (first) { INSERT(OCH_, conc); /* offset is wrong */ prevfwd = conc; prevback = conc; first = 0; } ASTERN(OOR1, prevback); prevback = THERE(); AHEAD(prevfwd); /* fix previous offset */ prevfwd = HERE(); EMIT(OOR2, 0); /* offset is very wrong */ } if (!first) { /* tail-end fixups */ AHEAD(prevfwd); ASTERN(O_CH, prevback); } assert(!MORE() || SEE(stop)); }
/* - p_ere - ERE parser top level, concatenation and alternation */ static void p_ere(struct parse *p, int stop) /* character this ERE should end at */ { char c; sopno prevback; sopno prevfwd; sopno conc; int first = 1; /* is this the first alternative? */ for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) p_ere_exp(p); REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) break; /* NOTE BREAK OUT */ if (first) { INSERT(OCH_, conc); /* offset is wrong */ prevfwd = conc; prevback = conc; first = 0; } ASTERN(OOR1, prevback); prevback = THERE(); AHEAD(prevfwd); /* fix previous offset */ prevfwd = HERE(); EMIT(OOR2, 0); /* offset is very wrong */ } if (!first) { /* tail-end fixups */ AHEAD(prevfwd); ASTERN(O_CH, prevback); } assert(!MORE() || SEE(stop)); }
/* - repeat - generate code for a bounded repetition, recursively if needed */ static void repeat(struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ int to) /* to this number of times (maybe INFINITY) */ { sopno finish = HERE(); # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish-start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ repeat(p, start+1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); repeat(p, copy, 1, to-1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to-1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; } }
/* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op */ static void p_ere_exp(struct parse *p) { char c; sopno pos; int count; int count2; sopno subno; int wascaret = 0; assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); pos = HERE(); switch (c) { case '(': REQUIRE(MORE(), REG_EPAREN); p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) p_ere(p, ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); MUSTEAT(')', REG_EPAREN); break; #ifndef POSIX_MISTAKE case ')': /* happens only if no current unmatched ( */ /* * You may ask, why the ifndef? Because I didn't notice * this until slightly too late for 1003.2, and none of the * other 1003.2 regular-expression reviewers noticed it at * all. So an unmatched ) is legal POSIX, at least until * we can get it fixed. */ SETERROR(REG_EPAREN); break; #endif case '^': EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; wascaret = 1; break; case '$': EMIT(OEOL, 0); p->g->iflags |= USEEOL; p->g->neol++; break; case '|': SETERROR(REG_EMPTY); break; case '*': case '+': case '?': SETERROR(REG_BADRPT); break; case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case '\\': REQUIRE(MORE(), REG_EESCAPE); c = GETNEXT(); ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); break; } if (!MORE()) return; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) return; /* no repetition, we're done */ NEXT(); REQUIRE(!wascaret, REG_BADRPT); switch (c) { case '*': /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); break; case '+': INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); break; case '?': /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, pos); /* offset slightly wrong */ ASTERN(OOR1, pos); /* this one's right */ AHEAD(pos); /* fix the OCH_ */ EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); break; case '{': count = p_count(p); if (EAT(',')) { if (isdigit((uch)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } break; } if (!MORE()) return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) return; SETERROR(REG_BADRPT); }
WireSyntax::WireSyntax() { SYNTAX("Wire"); DEFINE_VOID("Comment", CHOICE( GLUE( STRING("//"), CHOICE( FIND(AHEAD(CHAR('\n'))), FIND(EOI()) ) ), GLUE( STRING("/*"), REPEAT( CHOICE( INLINE("Comment"), GLUE( NOT(STRING("*/")), ANY() ) ) ), STRING("*/") ) ) ); DEFINE_VOID("Whitespace", REPEAT( CHOICE( RANGE(" \t\n"), INLINE("Comment") ) ) ); DEFINE("Name", REPEAT(1, EXCEPT(" \t\n:;")) ); DEFINE("Value", CHOICE( REF("Atom"), REF("Properties"), REF("Items") ) ); DEFINE("Object", GLUE( REPEAT(0, 1, GLUE( REF("Name"), INLINE("Whitespace"), CHAR(':'), INLINE("Whitespace") ) ), INLINE("Value") ) ); DEFINE("Atom", REPEAT( GLUE( NOT( CHOICE( GLUE( REPEAT(RANGE(" \t")), INLINE("Name"), REPEAT(RANGE(" \t")), CHAR(':') ), GLUE( REPEAT(RANGE(" \t")), RANGE("};,") ), STRING("\n\n") ) ), ANY() ) ) ); DEFINE("Properties", GLUE( CHAR('{'), INLINE("Whitespace"), REPEAT( GLUE( REF("Object"), INLINE("Whitespace"), RANGE(";,"), INLINE("Whitespace") ) ), CHAR('}') ) ); DEFINE("Items", GLUE( CHAR('['), INLINE("Whitespace"), REPEAT( GLUE( REF("Value"), INLINE("Whitespace"), RANGE(";,"), INLINE("Whitespace") ) ), CHAR(']') ) ); ENTRY("Object"); }