PTR REcompile(char *re, size_t len) { MACHINE m_stack[STACKSZ]; struct op { int token; int prec; } op_stack[STACKSZ]; register MACHINE *m_ptr; register struct op *op_ptr; register int t; /* do this first because it also checks if we have a run time stack */ RE_lex_init(re, len); if (len == 0) { STATE *p = (STATE *) RE_malloc(sizeof(STATE)); p->s_type = M_ACCEPT; return (PTR) p; } if (setjmp(err_buf)) return (PTR) 0; /* we used to try to recover memory left on machine stack ; but now m_ptr is in a register so it won't be right unless we force it out of a register which isn't worth the trouble */ /* initialize the stacks */ m_ptr = m_stack - 1; op_ptr = op_stack; op_ptr->token = 0; t = RE_lex(m_stack); while (1) { switch (t) { case T_STR: case T_ANY: case T_U: case T_START: case T_END: case T_CLASS: m_ptr++; break; case 0: /* end of reg expr */ if (op_ptr->token == 0) { /* done */ if (m_ptr == m_stack) return (PTR) m_ptr->start; else { /* machines still on the stack */ RE_panic("values still on machine stack"); } } /* otherwise fall thru to default which is operator case */ default: if ((op_ptr->prec = table[op_ptr->token][t]) == G) { do { /* op_pop */ if (op_ptr->token <= T_CAT) /*binary op */ m_ptr--; /* if not enough values on machine stack then we have a missing operand */ if (m_ptr < m_stack) RE_error_trap(-E4); switch (op_ptr->token) { case T_CAT: RE_cat(m_ptr, m_ptr + 1); break; case T_OR: RE_or(m_ptr, m_ptr + 1); break; case T_STAR: RE_close(m_ptr); break; case T_PLUS: RE_poscl(m_ptr); break; case T_Q: RE_01(m_ptr); break; default: /*nothing on ( or ) */ break; } op_ptr--; } while (op_ptr->prec != L); continue; /* back thru switch at top */ } if (op_ptr->prec < 0) { if (op_ptr->prec == E7) RE_panic("parser returns E7"); else RE_error_trap(-op_ptr->prec); } if (++op_ptr == op_stack + STACKSZ) { /* stack overflow */ RE_error_trap(-E5); } op_ptr->token = t; } /* end of switch */ if (m_ptr == m_stack + (STACKSZ - 1)) { /*overflow */ RE_error_trap(-E5); } t = RE_lex(m_ptr + 1); } }
char * REmatch(char *str, /* string to test */ unsigned str_len, /* ...its length */ PTR machine, /* compiled regular expression */ unsigned *lenp) /* where to return matched-length */ { register STATE *m = (STATE *) machine; register char *s = str; char *ss; register RT_STATE *stackp; int u_flag, t; char *str_end = s + str_len; char *ts; /* state of current best match stored here */ char *cb_ss; /* the start */ char *cb_e = 0; /* the end , pts at first char not matched */ *lenp = 0; /* check for the easy case */ if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) { if ((ts = str_str(s, str_len, m->s_data.str, m->s_len))) *lenp = m->s_len; return ts; } u_flag = U_ON; cb_ss = ss = (char *) 0; stackp = RE_run_stack_empty; goto reswitch; refill: if (stackp == RE_run_stack_empty) { if (cb_ss) *lenp = (unsigned) (cb_e - cb_ss); return cb_ss; } ss = stackp->ss; s = (stackp--)->s; if (cb_ss) { /* does new state start too late ? */ if (ss) { if (cb_ss < ss) goto refill; } else if (cb_ss < s) { goto refill; } } m = (stackp + 1)->m; u_flag = (stackp + 1)->u; reswitch: switch (m->s_type + u_flag) { case M_STR + U_OFF + END_OFF: if (strncmp(s, m->s_data.str, m->s_len)) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } s += m->s_len; m++; goto reswitch; case M_STR + U_OFF + END_ON: if (strcmp(s, m->s_data.str)) goto refill; if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; goto reswitch; case M_STR + U_ON + END_OFF: if (!(s = str_str(s, str_len, m->s_data.str, m->s_len))) goto refill; if (s >= str + strlen(str)) goto refill; push(m, s + 1, ss, U_ON); if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; u_flag = U_OFF; goto reswitch; case M_STR + U_ON + END_ON: t = (str_end - s) - m->s_len; if (t < 0 || memcmp(ts = s + t, m->s_data.str, m->s_len)) goto refill; if (!ss) { if (cb_ss && ts > cb_ss) goto refill; else ss = ts; } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_OFF + END_OFF: if (!ison(*m->s_data.bvp, s[0])) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } s++; m++; goto reswitch; case M_CLASS + U_OFF + END_ON: if (s[1] || !ison(*m->s_data.bvp, s[0])) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } s++; m++; goto reswitch; case M_CLASS + U_ON + END_OFF: while (!ison(*m->s_data.bvp, s[0])) { if (s[0] == 0) goto refill; else s++; } s++; push(m, s, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) goto refill; else ss = s - 1; } m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_ON + END_ON: if (s[0] == 0 || !ison(*m->s_data.bvp, str_end[-1])) goto refill; if (!ss) { if (cb_ss && str_end - 1 > cb_ss) goto refill; else ss = str_end - 1; } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_OFF + END_OFF: if (s[0] == 0) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } s++; m++; goto reswitch; case M_ANY + U_OFF + END_ON: if (s[0] == 0 || s[1] != 0) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } s++; m++; goto reswitch; case M_ANY + U_ON + END_OFF: if (s[0] == 0) goto refill; s++; push(m, s, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) goto refill; else ss = s - 1; } m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_ON + END_ON: if (s[0] == 0) goto refill; if (!ss) { if (cb_ss && str_end - 1 > cb_ss) goto refill; else ss = str_end - 1; } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_OFF: case M_START + U_ON + END_OFF: if (s != str) goto refill; ss = s; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_ON: case M_START + U_ON + END_ON: if (s != str || s[0] != 0) goto refill; ss = s; m++; u_flag = U_OFF; goto reswitch; case M_END + U_OFF: if (s[0] != 0) goto refill; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } m++; goto reswitch; case M_END + U_ON: s = str_end; if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } m++; u_flag = U_OFF; goto reswitch; CASE_UANY(M_U): if (!ss) { if (cb_ss && s > cb_ss) goto refill; else ss = s; } u_flag = U_ON; m++; goto reswitch; CASE_UANY(M_1J): m += m->s_data.jump; goto reswitch; CASE_UANY(M_2JA): /* take the non jump branch */ push(m + m->s_data.jump, s, ss, u_flag); m++; goto reswitch; CASE_UANY(M_2JB): /* take the jump branch */ push(m + 1, s, ss, u_flag); m += m->s_data.jump; goto reswitch; case M_ACCEPT + U_OFF: if (!ss) ss = s; if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; } goto refill; case M_ACCEPT + U_ON: if (!ss) { ss = s; } else { s = str_end; } if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; } goto refill; default: RE_panic("unexpected case in REmatch"); } }
char * REmatch(char *str, /* string to test */ size_t str_len, /* ...its length */ PTR machine, /* compiled regular expression */ size_t *lenp) /* where to return matched-length */ { register STATE *m = (STATE *) machine; char *s = str; char *ss; register RT_STATE *stackp; int u_flag, t; char *str_end = s + str_len; RT_POS_ENTRY *sp; char *ts; /* state of current best match stored here */ char *cb_ss; /* the start */ char *cb_e = 0; /* the end , pts at first char not matched */ STATE *m_best = 0; *lenp = 0; /* check for the easy case */ if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) { if ((ts = str_str(s, str_len, m->s_data.str, (size_t) m->s_len))) *lenp = m->s_len; return ts; } u_flag = U_ON; cb_ss = ss = (char *) 0; stackp = RE_run_stack_empty; sp = RE_pos_stack_empty; goto reswitch; refill: if (stackp == RE_run_stack_empty) { if (cb_ss) *lenp = (unsigned) (cb_e - cb_ss); return cb_ss; } ss = stackp->ss; s = (stackp--)->s; if (cb_ss) { /* does new state start too late ? */ if (ss) { if (cb_ss < ss || (cb_ss == ss && cb_e == str_end)) { goto refill; } } else if (cb_ss < s || (cb_ss == s && cb_e == str_end)) { goto refill; } } m = (stackp + 1)->m; sp = RE_pos_stack_base + (stackp + 1)->sp; sp->prev_offset = (stackp + 1)->tp; u_flag = (stackp + 1)->u; reswitch: switch (m->s_type + u_flag) { case M_STR + U_OFF + END_OFF: if (strncmp(s, m->s_data.str, (size_t) m->s_len)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; goto reswitch; case M_STR + U_OFF + END_ON: if (strcmp(s, m->s_data.str)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; goto reswitch; case M_STR + U_ON + END_OFF: if (s >= str_end) { goto refill; } if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) { goto refill; } if (s >= str + strlen(str)) { goto refill; } push(m, s + 1, sp, ss, U_ON); if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; u_flag = U_OFF; goto reswitch; case M_STR + U_ON + END_ON: t = (int) ((str_end - s) - m->s_len); if (t < 0 || memcmp(ts = s + t, m->s_data.str, (size_t) m->s_len)) { goto refill; } if (!ss) { if (cb_ss && ts > cb_ss) { goto refill; } else { ss = ts; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_OFF + END_OFF: if (s >= str_end) goto refill; if (!ison(*m->s_data.bvp, s[0])) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_CLASS + U_OFF + END_ON: if (s >= str_end) goto refill; if (s[1] || !ison(*m->s_data.bvp, s[0])) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_CLASS + U_ON + END_OFF: if (s >= str_end) goto refill; while (!ison(*m->s_data.bvp, s[0])) { if (s >= str_end) { goto refill; } else { s++; } } if (s >= str_end) { goto refill; } s++; push(m, s, sp, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) { goto refill; } else { ss = s - 1; } } m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_ON + END_ON: if ((s >= str_end) || !ison(*m->s_data.bvp, str_end[-1])) { goto refill; } if (!ss) { if (cb_ss && str_end - 1 > cb_ss) { goto refill; } else { ss = str_end - 1; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_OFF + END_OFF: if (s >= str_end) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_ANY + U_OFF + END_ON: if ((s >= str_end) || ((s + 1) < str_end)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_ANY + U_ON + END_OFF: if (s >= str_end) { goto refill; } s++; push(m, s, sp, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) { goto refill; } else { ss = s - 1; } } m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_ON + END_ON: if (s >= str_end) { goto refill; } if (!ss) { if (cb_ss && str_end - 1 > cb_ss) { goto refill; } else { ss = str_end - 1; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_OFF: case M_START + U_ON + END_OFF: if (s != str) { goto refill; } ss = s; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_ON: case M_START + U_ON + END_ON: if (s != str || (s < str_end)) { goto refill; } ss = s; m++; u_flag = U_OFF; goto reswitch; case M_END + U_OFF: if (s < str_end) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } m++; goto reswitch; case M_END + U_ON: s = str_end; if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } m++; u_flag = U_OFF; goto reswitch; CASE_UANY(M_U): if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } u_flag = U_ON; m++; goto reswitch; CASE_UANY(M_1J): m += m->s_data.jump; goto reswitch; CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */ /* see also REtest */ sp = RE_pos_push(sp, stackp, s); m++; goto reswitch; CASE_UANY(M_2JA): /* take the non jump branch */ push(m + m->s_data.jump, s, sp, ss, u_flag); m++; goto reswitch; CASE_UANY(M_2JC): /* take the jump branch if position changed */ /* see REtest */ if (RE_pos_pop(&sp, stackp) == s) { m++; goto reswitch; } /* fall thru */ CASE_UANY(M_2JB): /* take the jump branch */ push(m + 1, s, sp, ss, u_flag); m += m->s_data.jump; goto reswitch; case M_ACCEPT + U_OFF: if (!ss) ss = s; if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; m_best = m; } goto refill; case M_ACCEPT + U_ON: if (!ss) { ss = s; } else { s = str_end; } if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; m_best = m; } goto refill; default: RE_panic("unexpected case in REmatch"); } }
/* * test if str ~ /machine/ */ int REtest(char *str, /* string to test */ size_t len, /* ...its length */ PTR machine) /* compiled regular-expression */ { register STATE *m = (STATE *) machine; char *s = str; register RT_STATE *stackp; int u_flag; char *str_end = str + len; RT_POS_ENTRY *sp; int t; /*convenient temps */ STATE *tm; /* handle the easy case quickly */ if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) { return str_str(s, len, m->s_data.str, (size_t) m->s_len) != (char *) 0; } else { u_flag = U_ON; stackp = RE_run_stack_empty; sp = RE_pos_stack_empty; goto reswitch; } refill: if (stackp == RE_run_stack_empty) return 0; m = stackp->m; s = stackp->s; sp = RE_pos_stack_base + stackp->sp; sp->prev_offset = stackp->tp; u_flag = (stackp--)->u; reswitch: switch (m->s_type + u_flag) { case M_STR + U_OFF + END_OFF: if (strncmp(s, m->s_data.str, (size_t) m->s_len)) goto refill; s += m->s_len; m++; goto reswitch; case M_STR + U_OFF + END_ON: if (strcmp(s, m->s_data.str)) goto refill; s += m->s_len; m++; goto reswitch; case M_STR + U_ON + END_OFF: if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) goto refill; push(m, s + 1, sp, U_ON); s += m->s_len; m++; u_flag = U_OFF; goto reswitch; case M_STR + U_ON + END_ON: t = (str_end - s) - m->s_len; if (t < 0 || memcmp(s + t, m->s_data.str, (size_t) m->s_len)) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_OFF + END_OFF: if (s >= str_end || !ison(*m->s_data.bvp, s[0])) goto refill; s++; m++; goto reswitch; case M_CLASS + U_OFF + END_ON: if (s >= str_end) goto refill; if ((s + 1) < str_end || !ison(*m->s_data.bvp, s[0])) goto refill; s++; m++; goto reswitch; case M_CLASS + U_ON + END_OFF: for (;;) { if (s >= str_end) goto refill; else if (ison(*m->s_data.bvp, s[0])) break; s++; } s++; push(m, s, sp, U_ON); m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_ON + END_ON: if (s >= str_end || !ison(*m->s_data.bvp, str_end[-1])) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_OFF + END_OFF: if (s >= str_end) goto refill; s++; m++; goto reswitch; case M_ANY + U_OFF + END_ON: if (s >= str_end || (s + 1) < str_end) goto refill; s++; m++; goto reswitch; case M_ANY + U_ON + END_OFF: if (s >= str_end) goto refill; s++; push(m, s, sp, U_ON); m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_ON + END_ON: if (s >= str_end) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_OFF: case M_START + U_ON + END_OFF: if (s != str) goto refill; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_ON: case M_START + U_ON + END_ON: if (s != str || s < str_end) goto refill; m++; u_flag = U_OFF; goto reswitch; case M_END + U_OFF: if (s < str_end) goto refill; m++; goto reswitch; case M_END + U_ON: s += strlen(s); m++; u_flag = U_OFF; goto reswitch; CASE_UANY(M_U): u_flag = U_ON; m++; goto reswitch; CASE_UANY(M_1J): m += m->s_data.jump; goto reswitch; CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */ sp = RE_pos_push(sp, stackp, s); m++; goto reswitch; CASE_UANY(M_2JA): /* take the non jump branch */ /* don't stack an ACCEPT */ if ((tm = m + m->s_data.jump)->s_type == M_ACCEPT) return 1; push(tm, s, sp, u_flag); m++; goto reswitch; CASE_UANY(M_2JC): /* take the jump branch if position changed */ if (RE_pos_pop(&sp, stackp) == s) { /* did not advance: do not jump back */ m++; goto reswitch; } /* fall thru */ CASE_UANY(M_2JB): /* take the jump branch */ /* don't stack an ACCEPT */ if ((tm = m + 1)->s_type == M_ACCEPT) return 1; push(tm, s, sp, u_flag); m += m->s_data.jump; goto reswitch; CASE_UANY(M_ACCEPT): return 1; default: RE_panic("unexpected case in REtest"); } }