/* - p_str - string (no metacharacters) "parser" */ static void p_str(struct parse *p) { REQUIRE(MORE(), REG_EMPTY); while (MORE()) ordinary(p, GETNEXT()); }
/* - p_str - string (no metacharacters) "parser" == static void p_str(struct parse *p); */ static void p_str( struct parse *p) { _DIAGASSERT(p != NULL); REQUIRE(MORE(), REG_EMPTY); while (MORE()) ordinary(p, GETNEXT()); }
/* - p_bracket - parse a bracketed character list * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */ static void p_bracket(struct parse *p) { cset *cs; int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; } if ((cs = allocset(p)) == NULL) { /* allocset did set error status in p */ return; } if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) CHadd(cs, ']'); else if (EAT('-')) CHadd(cs, '-'); while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); MUSTEAT(']', REG_EBRACK); if (p->error != 0) { /* don't mess things up further */ freeset(p, cs); return; } if (p->g->cflags®_ICASE) { int i; int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); } if (cs->multis != NULL) mccase(p, cs); } if (invert) { int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) CHsub(cs, i); else CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) mcinvert(p, cs); } assert(cs->multis == NULL); /* xxx */ if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); } else EMIT(OANYOF, freezeset(p, cs)); }
/* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition */ static int /* was the simple RE an unbackslashed $? */ p_simp_re(struct parse *p, int starordinary) /* is a leading * an ordinary character? */ { int c; int count; int count2; sopno pos; int i; sopno subno; # define BACKSL (1<<CHAR_BIT) pos = HERE(); /* repetion op, if any, covers from here */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { REQUIRE(MORE(), REG_EESCAPE); c = BACKSL | GETNEXT(); } switch (c) { case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case BACKSL|'{': SETERROR(REG_BADRPT); break; case BACKSL|'(': p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) p_bre(p, '\\', ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': SETERROR(REG_EPAREN); break; case BACKSL|'1': case BACKSL|'2': case BACKSL|'3': case BACKSL|'4': case BACKSL|'5': case BACKSL|'6': case BACKSL|'7': case BACKSL|'8': case BACKSL|'9': i = (c&~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { assert(i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); assert(OP(p->strip[p->pend[i]]) == ORPAREN); (void) dupl(p, p->pbegin[i]+1, p->pend[i]); EMIT(O_BACK, i); } else SETERROR(REG_ESUBREG); p->g->backrefs = 1; break; case '*': REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, (char)c); break; } if (EAT('*')) { /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { if (MORE() && isdigit((uch)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == '$') /* $ (but not \$) ends it */ return(1); return(0); }
/* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op */ static void p_ere_exp(struct parse *p) { char c; sopno pos; int count; int count2; sopno subno; int wascaret = 0; assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); pos = HERE(); switch (c) { case '(': REQUIRE(MORE(), REG_EPAREN); p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) p_ere(p, ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); MUSTEAT(')', REG_EPAREN); break; #ifndef POSIX_MISTAKE case ')': /* happens only if no current unmatched ( */ /* * You may ask, why the ifndef? Because I didn't notice * this until slightly too late for 1003.2, and none of the * other 1003.2 regular-expression reviewers noticed it at * all. So an unmatched ) is legal POSIX, at least until * we can get it fixed. */ SETERROR(REG_EPAREN); break; #endif case '^': EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; wascaret = 1; break; case '$': EMIT(OEOL, 0); p->g->iflags |= USEEOL; p->g->neol++; break; case '|': SETERROR(REG_EMPTY); break; case '*': case '+': case '?': SETERROR(REG_BADRPT); break; case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case '\\': REQUIRE(MORE(), REG_EESCAPE); c = GETNEXT(); ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); break; } if (!MORE()) return; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) return; /* no repetition, we're done */ NEXT(); REQUIRE(!wascaret, REG_BADRPT); switch (c) { case '*': /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); break; case '+': INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); break; case '?': /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, pos); /* offset slightly wrong */ ASTERN(OOR1, pos); /* this one's right */ AHEAD(pos); /* fix the OCH_ */ EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); break; case '{': count = p_count(p); if (EAT(',')) { if (isdigit((uch)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } break; } if (!MORE()) return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) return; SETERROR(REG_BADRPT); }