char * REmatch(char *str, /* string to test */ size_t str_len, /* ...its length */ PTR machine, /* compiled regular expression */ size_t *lenp) /* where to return matched-length */ { register STATE *m = (STATE *) machine; char *s = str; char *ss; register RT_STATE *stackp; int u_flag, t; char *str_end = s + str_len; RT_POS_ENTRY *sp; char *ts; /* state of current best match stored here */ char *cb_ss; /* the start */ char *cb_e = 0; /* the end , pts at first char not matched */ STATE *m_best = 0; *lenp = 0; /* check for the easy case */ if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) { if ((ts = str_str(s, str_len, m->s_data.str, (size_t) m->s_len))) *lenp = m->s_len; return ts; } u_flag = U_ON; cb_ss = ss = (char *) 0; stackp = RE_run_stack_empty; sp = RE_pos_stack_empty; goto reswitch; refill: if (stackp == RE_run_stack_empty) { if (cb_ss) *lenp = (unsigned) (cb_e - cb_ss); return cb_ss; } ss = stackp->ss; s = (stackp--)->s; if (cb_ss) { /* does new state start too late ? */ if (ss) { if (cb_ss < ss || (cb_ss == ss && cb_e == str_end)) { goto refill; } } else if (cb_ss < s || (cb_ss == s && cb_e == str_end)) { goto refill; } } m = (stackp + 1)->m; sp = RE_pos_stack_base + (stackp + 1)->sp; sp->prev_offset = (stackp + 1)->tp; u_flag = (stackp + 1)->u; reswitch: switch (m->s_type + u_flag) { case M_STR + U_OFF + END_OFF: if (strncmp(s, m->s_data.str, (size_t) m->s_len)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; goto reswitch; case M_STR + U_OFF + END_ON: if (strcmp(s, m->s_data.str)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; goto reswitch; case M_STR + U_ON + END_OFF: if (s >= str_end) { goto refill; } if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) { goto refill; } if (s >= str + strlen(str)) { goto refill; } push(m, s + 1, sp, ss, U_ON); if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s += m->s_len; m++; u_flag = U_OFF; goto reswitch; case M_STR + U_ON + END_ON: t = (int) ((str_end - s) - m->s_len); if (t < 0 || memcmp(ts = s + t, m->s_data.str, (size_t) m->s_len)) { goto refill; } if (!ss) { if (cb_ss && ts > cb_ss) { goto refill; } else { ss = ts; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_OFF + END_OFF: if (s >= str_end) goto refill; if (!ison(*m->s_data.bvp, s[0])) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_CLASS + U_OFF + END_ON: if (s >= str_end) goto refill; if (s[1] || !ison(*m->s_data.bvp, s[0])) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_CLASS + U_ON + END_OFF: if (s >= str_end) goto refill; while (!ison(*m->s_data.bvp, s[0])) { if (s >= str_end) { goto refill; } else { s++; } } if (s >= str_end) { goto refill; } s++; push(m, s, sp, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) { goto refill; } else { ss = s - 1; } } m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_ON + END_ON: if ((s >= str_end) || !ison(*m->s_data.bvp, str_end[-1])) { goto refill; } if (!ss) { if (cb_ss && str_end - 1 > cb_ss) { goto refill; } else { ss = str_end - 1; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_OFF + END_OFF: if (s >= str_end) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_ANY + U_OFF + END_ON: if ((s >= str_end) || ((s + 1) < str_end)) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } s++; m++; goto reswitch; case M_ANY + U_ON + END_OFF: if (s >= str_end) { goto refill; } s++; push(m, s, sp, ss, U_ON); if (!ss) { if (cb_ss && s - 1 > cb_ss) { goto refill; } else { ss = s - 1; } } m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_ON + END_ON: if (s >= str_end) { goto refill; } if (!ss) { if (cb_ss && str_end - 1 > cb_ss) { goto refill; } else { ss = str_end - 1; } } s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_OFF: case M_START + U_ON + END_OFF: if (s != str) { goto refill; } ss = s; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_ON: case M_START + U_ON + END_ON: if (s != str || (s < str_end)) { goto refill; } ss = s; m++; u_flag = U_OFF; goto reswitch; case M_END + U_OFF: if (s < str_end) { goto refill; } if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } m++; goto reswitch; case M_END + U_ON: s = str_end; if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } m++; u_flag = U_OFF; goto reswitch; CASE_UANY(M_U): if (!ss) { if (cb_ss && s > cb_ss) { goto refill; } else { ss = s; } } u_flag = U_ON; m++; goto reswitch; CASE_UANY(M_1J): m += m->s_data.jump; goto reswitch; CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */ /* see also REtest */ sp = RE_pos_push(sp, stackp, s); m++; goto reswitch; CASE_UANY(M_2JA): /* take the non jump branch */ push(m + m->s_data.jump, s, sp, ss, u_flag); m++; goto reswitch; CASE_UANY(M_2JC): /* take the jump branch if position changed */ /* see REtest */ if (RE_pos_pop(&sp, stackp) == s) { m++; goto reswitch; } /* fall thru */ CASE_UANY(M_2JB): /* take the jump branch */ push(m + 1, s, sp, ss, u_flag); m += m->s_data.jump; goto reswitch; case M_ACCEPT + U_OFF: if (!ss) ss = s; if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; m_best = m; } goto refill; case M_ACCEPT + U_ON: if (!ss) { ss = s; } else { s = str_end; } if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { /* we have a new current best */ cb_ss = ss; cb_e = s; m_best = m; } goto refill; default: RE_panic("unexpected case in REmatch"); } }
/* * test if str ~ /machine/ */ int REtest(char *str, /* string to test */ size_t len, /* ...its length */ PTR machine) /* compiled regular-expression */ { register STATE *m = (STATE *) machine; char *s = str; register RT_STATE *stackp; int u_flag; char *str_end = str + len; RT_POS_ENTRY *sp; int t; /*convenient temps */ STATE *tm; /* handle the easy case quickly */ if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) { return str_str(s, len, m->s_data.str, (size_t) m->s_len) != (char *) 0; } else { u_flag = U_ON; stackp = RE_run_stack_empty; sp = RE_pos_stack_empty; goto reswitch; } refill: if (stackp == RE_run_stack_empty) return 0; m = stackp->m; s = stackp->s; sp = RE_pos_stack_base + stackp->sp; sp->prev_offset = stackp->tp; u_flag = (stackp--)->u; reswitch: switch (m->s_type + u_flag) { case M_STR + U_OFF + END_OFF: if (strncmp(s, m->s_data.str, (size_t) m->s_len)) goto refill; s += m->s_len; m++; goto reswitch; case M_STR + U_OFF + END_ON: if (strcmp(s, m->s_data.str)) goto refill; s += m->s_len; m++; goto reswitch; case M_STR + U_ON + END_OFF: if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) goto refill; push(m, s + 1, sp, U_ON); s += m->s_len; m++; u_flag = U_OFF; goto reswitch; case M_STR + U_ON + END_ON: t = (str_end - s) - m->s_len; if (t < 0 || memcmp(s + t, m->s_data.str, (size_t) m->s_len)) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_OFF + END_OFF: if (s >= str_end || !ison(*m->s_data.bvp, s[0])) goto refill; s++; m++; goto reswitch; case M_CLASS + U_OFF + END_ON: if (s >= str_end) goto refill; if ((s + 1) < str_end || !ison(*m->s_data.bvp, s[0])) goto refill; s++; m++; goto reswitch; case M_CLASS + U_ON + END_OFF: for (;;) { if (s >= str_end) goto refill; else if (ison(*m->s_data.bvp, s[0])) break; s++; } s++; push(m, s, sp, U_ON); m++; u_flag = U_OFF; goto reswitch; case M_CLASS + U_ON + END_ON: if (s >= str_end || !ison(*m->s_data.bvp, str_end[-1])) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_OFF + END_OFF: if (s >= str_end) goto refill; s++; m++; goto reswitch; case M_ANY + U_OFF + END_ON: if (s >= str_end || (s + 1) < str_end) goto refill; s++; m++; goto reswitch; case M_ANY + U_ON + END_OFF: if (s >= str_end) goto refill; s++; push(m, s, sp, U_ON); m++; u_flag = U_OFF; goto reswitch; case M_ANY + U_ON + END_ON: if (s >= str_end) goto refill; s = str_end; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_OFF: case M_START + U_ON + END_OFF: if (s != str) goto refill; m++; u_flag = U_OFF; goto reswitch; case M_START + U_OFF + END_ON: case M_START + U_ON + END_ON: if (s != str || s < str_end) goto refill; m++; u_flag = U_OFF; goto reswitch; case M_END + U_OFF: if (s < str_end) goto refill; m++; goto reswitch; case M_END + U_ON: s += strlen(s); m++; u_flag = U_OFF; goto reswitch; CASE_UANY(M_U): u_flag = U_ON; m++; goto reswitch; CASE_UANY(M_1J): m += m->s_data.jump; goto reswitch; CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */ sp = RE_pos_push(sp, stackp, s); m++; goto reswitch; CASE_UANY(M_2JA): /* take the non jump branch */ /* don't stack an ACCEPT */ if ((tm = m + m->s_data.jump)->s_type == M_ACCEPT) return 1; push(tm, s, sp, u_flag); m++; goto reswitch; CASE_UANY(M_2JC): /* take the jump branch if position changed */ if (RE_pos_pop(&sp, stackp) == s) { /* did not advance: do not jump back */ m++; goto reswitch; } /* fall thru */ CASE_UANY(M_2JB): /* take the jump branch */ /* don't stack an ACCEPT */ if ((tm = m + 1)->s_type == M_ACCEPT) return 1; push(tm, s, sp, u_flag); m += m->s_data.jump; goto reswitch; CASE_UANY(M_ACCEPT): return 1; default: RE_panic("unexpected case in REtest"); } }