/* - regdump - dump a SRE onto stdout in vaguely comprehensible form */ void regdump(SRE *r) { register char *s; register char op = EXACTLY; /* Arbitrary non-END op. */ register char *next; extern char *strchr(); s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) { /* Next ptr. */ printf("(0)"); } else { printf("(%d)", (s-r->program)+(next-s)); } s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { putchar(*s); s++; } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') { printf("start `%c' ", r->regstart); } if (r->reganch) { printf("anchored "); } if (r->regmust != NULL) { printf("must have \"%s\"", r->regmust); } printf("\n"); }
/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */ void regdump (regexp * r) { register char *s; register char op = EXACTLY; /* Arbitrary non-END op. */ register char *nxt; #ifdef _AIX extern char *strchr(); #endif /* _AIX */ s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2ld%s", (s - r->program), regprop(s)); /* Where, what. */ nxt = regnext(s); if (nxt == (char *) NULL) /* nxt ptr. */ printf("(0)"); else printf("(%ld)", ((s - r->program) + (nxt - s))); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { putchar(*s); s++; } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') printf("start `%c' ", r->regstart); if (r->reganch) printf("anchored "); if (r->regmust != (char *) NULL) printf("must have \"%s\"", r->regmust); printf("\n"); }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int /* 0 failure, 1 success */ regmatch( char *prog ) { register char *scan; /* Current node. */ char *next; /* Next node. */ scan = prog; #ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan)); #endif while (scan != NULL) { #ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan)); #endif next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != '\0') return(0); break; case WORDA: /* Must be looking at a letter, digit, or _ */ if ((!isalnum(*reginput)) && *reginput != '_') return(0); /* Prev must be BOL or nonword */ if (reginput > regbol && (isalnum(reginput[-1]) || reginput[-1] == '_')) return(0); break; case WORDZ: /* Must be looking at non letter, digit, or _ */ if (isalnum(*reginput) || *reginput == '_') return(0); /* We don't care what the previous char was */ break; case ANY: if (*reginput == '\0') return(0); reginput++; break; case EXACTLY: { register int len; register char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return(0); len = strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) return(0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) return(0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register const char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register const char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register const char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register char nextch; register int no; register const char *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case END: return(1); /* Success! */ break; default: regerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ regerror("corrupted pointers"); return(0); }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int regmatch (char * prog) { register char *scan; /* Current node. */ char *nxt; /* nxt node. */ scan = prog; #ifdef DEBUG if (scan != (char *) NULL && regnarrate) debug_message("%s(\n", regprop(scan)); #endif while (scan != (char *) NULL) { #ifdef DEBUG if (regnarrate) debug_message("%s...\n", regprop(scan)); #endif nxt = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return (0); break; case EOL: if (*reginput != '\0') return (0); break; case ANY: if (*reginput == '\0') return (0); reginput++; break; case WORDSTART: if (reginput == regbol) break; if (*reginput == '\0' || ISWORDPART(*(reginput - 1)) || !ISWORDPART(*reginput)) return (0); break; case WORDEND: if (*reginput == '\0') break; if (reginput == regbol || !ISWORDPART(*(reginput - 1)) || ISWORDPART(*reginput)) return (0); break; case EXACTLY:{ register int len; register char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return (0); len = strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) return (0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == (char *) NULL) return (0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != (char *) NULL) return (0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN + 1: case OPEN + 2: case OPEN + 3: case OPEN + 4: case OPEN + 5: case OPEN + 6: case OPEN + 7: case OPEN + 8: case OPEN + 9:{ register int no; register const char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(nxt)) { /* * Don't set startp if some later invocation of the same * parentheses already has. */ if (regstartp[no] == (char *) NULL) regstartp[no] = save; return (1); } else return (0); } break; case CLOSE + 1: case CLOSE + 2: case CLOSE + 3: case CLOSE + 4: case CLOSE + 5: case CLOSE + 6: case CLOSE + 7: case CLOSE + 8: case CLOSE + 9:{ register int no; register const char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(nxt)) { /* * Don't set endp if some later invocation of the same * parentheses already has. */ if (regendp[no] == (char *) NULL) regendp[no] = save; return (1); } else return (0); } break; case BRANCH:{ register const char *save; if (OP(nxt) != BRANCH) /* No choice. */ nxt = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return (1); reginput = save; scan = regnext(scan); } while (scan != (char *) NULL && OP(scan) == BRANCH); return (0); /* NOTREACHED */ } } break; case STAR: case PLUS:{ register char nextch; register int no; register const char *save; register int minimum; /* * Lookahead to avoid useless match attempts when we know * what character comes next. */ nextch = '\0'; if (OP(nxt) == EXACTLY) nextch = *OPERAND(nxt); minimum = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= minimum) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(nxt)) return (1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return (0); } break; case END: return (1); /* Success! */ break; default: regerror("memory corruption\n"); return (0); break; } scan = nxt; } /* * We get here only if there's trouble -- normally "case END" is the * terminating point. */ regerror("corrupted pointers\n"); return (0); }
/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */ static void regdump(regex_t *preg) { int s; int op = EXACTLY; /* Arbitrary non-END op. */ int next; char buf[MAX_UTF8_LEN + 1]; int i; for (i = 1; i < preg->p; i++) { printf("%02x ", (unsigned char)preg->program[i]); if (i % 16 == 0) { printf("\n"); } } printf("\n"); s = 1; while (op != END && s < preg->p) { /* While that wasn't END last time... */ op = OP(preg, s); printf("%3d: %s", s, regprop(op)); /* Where, what. */ next = regnext(preg, s); if (next == 0) /* Next ptr. */ printf("(0)"); else printf("(%d)", next); s += 2; if (op == REP || op == REPMIN || op == REPX || op == REPXMIN) { int max = preg->program[s]; int min = preg->program[s + 1]; if (max == 65535) { printf("{%d,*}", min); } else { printf("{%d,%d}", min, max); } printf(" %d", preg->program[s + 2]); s += 3; } else if (op == ANYOF || op == ANYBUT) { /* set of ranges */ while (preg->program[s]) { int len = preg->program[s++]; int first = preg->program[s++]; buf[utf8_getchars(buf, first)] = 0; printf("%s", buf); if (len > 1) { buf[utf8_getchars(buf, first + len - 1)] = 0; printf("-%s", buf); } } s++; } else if (op == EXACTLY) { /* Literal string, where present. */ while (preg->program[s]) { buf[utf8_getchars(buf, preg->program[s])] = 0; printf("%s", buf); s++; } s++; } putchar('\n'); } if (op == END) { /* Header fields of interest. */ if (preg->regstart) { buf[utf8_getchars(buf, preg->regstart)] = 0; printf("start '%s' ", buf); } if (preg->reganch) printf("anchored "); if (preg->regmust != 0) { int i; printf("must have:"); for (i = 0; i < preg->regmlen; i++) { putchar(preg->program[preg->regmust + i]); } putchar('\n'); } } printf("\n"); }
/* 0 failure, 1 success */ static int regmatch(regex_t *preg, int prog) { int scan; /* Current node. */ int next; /* Next node. */ const char *save; scan = prog; #ifdef DEBUG if (scan != 0 && regnarrate) fprintf(stderr, "%s(\n", regprop(scan)); #endif while (scan != 0) { int n; int c; #ifdef DEBUG if (regnarrate) { fprintf(stderr, "%3d: %s...\n", scan, regprop(OP(preg, scan))); /* Where, what. */ } #endif next = regnext(preg, scan); n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE)); switch (OP(preg, scan)) { case BOL: if (preg->reginput != preg->regbol) return(0); break; case EOL: if (!reg_iseol(preg, c)) { return(0); } break; case WORDA: /* Must be looking at a letter, digit, or _ */ if ((!isalnum(UCHAR(c))) && c != '_') return(0); /* Prev must be BOL or nonword */ if (preg->reginput > preg->regbol && (isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_')) return(0); break; case WORDZ: /* Can't match at BOL */ if (preg->reginput > preg->regbol) { /* Current must be EOL or nonword */ if (reg_iseol(preg, c) || !isalnum(UCHAR(c)) || c != '_') { c = preg->reginput[-1]; /* Previous must be word */ if (isalnum(UCHAR(c)) || c == '_') { break; } } } /* No */ return(0); case ANY: if (reg_iseol(preg, c)) return 0; preg->reginput += n; break; case EXACTLY: { int opnd; int len; int slen; opnd = OPERAND(scan); len = str_int_len(preg->program + opnd); slen = prefix_cmp(preg->program + opnd, len, preg->reginput, preg->cflags & REG_ICASE); if (slen < 0) { return(0); } preg->reginput += slen; } break; case ANYOF: if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) == 0) { return(0); } preg->reginput += n; break; case ANYBUT: if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) != 0) { return(0); } preg->reginput += n; break; case NOTHING: break; case BACK: break; case BRANCH: if (OP(preg, next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = preg->reginput; if (regmatch(preg, OPERAND(scan))) { return(1); } preg->reginput = save; scan = regnext(preg, scan); } while (scan != 0 && OP(preg, scan) == BRANCH); return(0); /* NOTREACHED */ } break; case REP: case REPMIN: return regmatchsimplerepeat(preg, scan, OP(preg, scan) == REPMIN); case REPX: case REPXMIN: return regmatchrepeat(preg, scan, OP(preg, scan) == REPXMIN); case END: return 1; /* Success! */ case OPENNC: case CLOSENC: return regmatch(preg, next); default: if (OP(preg, scan) >= OPEN+1 && OP(preg, scan) < CLOSE_END) { save = preg->reginput; if (regmatch(preg, next)) { if (OP(preg, scan) < CLOSE) { int no = OP(preg, scan) - OPEN; if (no < preg->nmatch && preg->pmatch[no].rm_so == -1) { preg->pmatch[no].rm_so = save - preg->start; } } else { int no = OP(preg, scan) - CLOSE; if (no < preg->nmatch && preg->pmatch[no].rm_eo == -1) { preg->pmatch[no].rm_eo = save - preg->start; } } return(1); } return(0); } return REG_ERR_INTERNAL; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ return REG_ERR_INTERNAL; }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int /* 0 failure, 1 success */ regmatch(char *prog) { register char *scan; /* Current node. */ char *next; /* Next node. */ extern char *strchr(); scan = prog; #ifdef DEBUG if (scan != NULL && regnarrate) { fprintf(stderr, "%s(\n", regprop(scan)); } #endif while (scan != NULL) { #ifdef DEBUG if (regnarrate) { fprintf(stderr, "%s...\n", regprop(scan)); } #endif next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) { return(0); } break; case EOL: if (regpeek(0) != '\0' && regpeek(0) != '\n') { return(0); } break; case BEGWORD: /* Match if current char isident * and previous char BOL or !ident */ if ((regpeek(0) == 0 || !isident(regpeek(0))) || (reginput != regbol && isident(regpeek(-1)))) { return(0); } break; case ENDWORD: /* Match if previous char isident * and current char EOL or !ident */ if ((regpeek(0) != 0 && isident(regpeek(0))) || reginput == regbol || !isident(regpeek(-1))) { return(0); } break; case WHITESP: /* match single whitespace */ if (regpeek(0) != 0 && !isspace(regpeek(0))) { return(0); } reginput++; break; case NWHITESP: /* don't match eol, or space or tab */ if (regpeek(0) == 0 || isspace(regpeek(0))) { return(0); } reginput++; break; case ALNUM: /* includes _ */ if (regpeek(0) == 0 || !isident(regpeek(0))) { return(0); } reginput++; break; case NALNUM: if (regpeek(0) == 0 || isident(regpeek(0))) { return(0); } reginput++; break; case DIGIT: if (regpeek(0) == 0 || !isdigit(regpeek(0))) { return(0); } reginput++; break; case NDIGIT: if (regpeek(0) == 0 || isdigit(regpeek(0))) { return(0); } reginput++; break; case PRINT: if (regpeek(0) == 0 || !(isprint(regpeek(0)) || isspace(regpeek(0)))) { return(0); } reginput++; break; case NPRINT: if (regpeek(0) == 0 || isprint(regpeek(0)) || isspace(regpeek(0))) { return(0); } reginput++; break; case ANY: if (regpeek(0) == '\0' || regpeek(0) == '\n') { return(0); } regseek(1); break; case EXACTLY: { register int len; register char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != regpeek(0)) { return(0); } len = strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) { return(0); } regseek(len); } break; case ANYOF: if (strchr(OPERAND(scan), regpeek(0)) == NULL) { return(0); } regseek(1); break; case ANYBUT: if (strchr(OPERAND(scan), regpeek(0)) != NULL) { return(0); } regseek(1); break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) { regstartp[no] = save; } return(1); } else { return(0); } } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) { regendp[no] = save; } return(1); } else { return(0); } } break; case BRANCH: { register char *save; if (OP(next) != BRANCH) { /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ } else { do { save = reginput; if (regmatch(OPERAND(scan))) { return(1); } reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register char nextch; register int no; register char *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) { nextch = *OPERAND(next); } min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || regpeek(0) == nextch) { if (regmatch(next)) { return(1); } } /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case MINMAX: { register char *save; unsigned char min; unsigned char max; register int no; next = OPERAND(scan); min = OP(next); next = OPERAND(next); max = OP(next); next = OPERAND(next); save = reginput; for (no = 0 ; no < min ; no++) { if (!regmatch(next)) { reginput = save; return(0); } } for ( ; no < max ; no++) { if (!regmatch(next)) { break; } } return(1); } break; case END: return(1); /* Success! */ break; default: SREerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ SREerror("corrupted pointers"); return(0); }