/* - p_b_term - parse one term of a bracketed character list */ static void p_b_term(struct parse *p, cset *cs) { char c; char start, finish; int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ break; default: c = '\0'; break; } switch (c) { case ':': /* character class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range */ /* xxx revision needed for multichar stuff */ start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ NEXT(); if (EAT('-')) finish = '-'; else finish = p_b_symbol(p); } else finish = start; /* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE); for (i = start; i <= finish; i++) CHadd(cs, i); break; } }
/* * prefixes - implement various special prefixes */ static void prefixes(struct vars * v) { /* literal string doesn't get any of this stuff */ if (v->cflags & REG_QUOTE) return; /* initial "***" gets special things */ if (HAVE(4) && NEXT3('*', '*', '*')) switch (*(v->now + 3)) { case CHR('?'): /* "***?" error, msg shows version */ ERR(REG_BADPAT); return; /* proceed no further */ break; case CHR('='): /* "***=" shifts to literal string */ NOTE(REG_UNONPOSIX); v->cflags |= REG_QUOTE; v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); v->now += 4; return; /* and there can be no more prefixes */ break; case CHR(':'): /* "***:" shifts to AREs */ NOTE(REG_UNONPOSIX); v->cflags |= REG_ADVANCED; v->now += 4; break; default: /* otherwise *** is just an error */ ERR(REG_BADRPT); return; break; } /* BREs and EREs don't get embedded options */ if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) return; /* embedded options (AREs only) */ if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { NOTE(REG_UNONPOSIX); v->now += 2; for (; !ATEOS() && iscalpha(*v->now); v->now++) switch (*v->now) { case CHR('b'): /* BREs (but why???) */ v->cflags &= ~(REG_ADVANCED | REG_QUOTE); break; case CHR('c'): /* case sensitive */ v->cflags &= ~REG_ICASE; break; case CHR('e'): /* plain EREs */ v->cflags |= REG_EXTENDED; v->cflags &= ~(REG_ADVF | REG_QUOTE); break; case CHR('i'): /* case insensitive */ v->cflags |= REG_ICASE; break; case CHR('m'): /* Perloid synonym for n */ case CHR('n'): /* \n affects ^ $ . [^ */ v->cflags |= REG_NEWLINE; break; case CHR('p'): /* ~Perl, \n affects . [^ */ v->cflags |= REG_NLSTOP; v->cflags &= ~REG_NLANCH; break; case CHR('q'): /* literal string */ v->cflags |= REG_QUOTE; v->cflags &= ~REG_ADVANCED; break; case CHR('s'): /* single line, \n ordinary */ v->cflags &= ~REG_NEWLINE; break; case CHR('t'): /* tight syntax */ v->cflags &= ~REG_EXPANDED; break; case CHR('w'): /* weird, \n affects ^ $ only */ v->cflags &= ~REG_NLSTOP; v->cflags |= REG_NLANCH; break; case CHR('x'): /* expanded syntax */ v->cflags |= REG_EXPANDED; break; default: ERR(REG_BADOPT); return; } if (!NEXT1(')')) { ERR(REG_BADOPT); return; } v->now++; if (v->cflags & REG_QUOTE) v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); } }
/* - brenext - get next BRE token * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. ^ static int brenext(struct vars *, pchr); */ static int /* 1 normal, 0 failure */ brenext( struct vars *v, pchr pc) { chr c = (chr)pc; switch (c) { case CHR('*'): if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) { RETV(PLAIN, c); } RET('*'); break; case CHR('['): if (HAVE(6) && *(v->now+0) == CHR('[') && *(v->now+1) == CHR(':') && (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) && *(v->now+3) == CHR(':') && *(v->now+4) == CHR(']') && *(v->now+5) == CHR(']')) { c = *(v->now+2); v->now += 6; NOTE(REG_UNONPOSIX); RET((c == CHR('<')) ? '<' : '>'); } INTOCON(L_BRACK); if (NEXT1('^')) { v->now++; RETV('[', 0); } RETV('[', 1); break; case CHR('.'): RET('.'); break; case CHR('^'): if (LASTTYPE(EMPTY)) { RET('^'); } if (LASTTYPE('(')) { NOTE(REG_UUNSPEC); RET('^'); } RETV(PLAIN, c); break; case CHR('$'): if (v->cflags®_EXPANDED) { skip(v); } if (ATEOS()) { RET('$'); } if (NEXT2('\\', ')')) { NOTE(REG_UUNSPEC); RET('$'); } RETV(PLAIN, c); break; case CHR('\\'): break; /* see below */ default: RETV(PLAIN, c); break; } assert(c == CHR('\\')); if (ATEOS()) { FAILW(REG_EESCAPE); } c = *v->now++; switch (c) { case CHR('{'): INTOCON(L_BBND); NOTE(REG_UBOUNDS); RET('{'); break; case CHR('('): RETV('(', 1); break; case CHR(')'): RETV(')', c); break; case CHR('<'): NOTE(REG_UNONPOSIX); RET('<'); break; case CHR('>'): NOTE(REG_UNONPOSIX); RET('>'); break; case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): NOTE(REG_UBACKREF); RETV(BACKREF, (chr)DIGITVAL(c)); break; default: if (iscalnum(c)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, c); break; } assert(NOTREACHED); }