Exemple #1
0
char *
REmatch(char *str,		/* string to test */
        size_t str_len,		/* ...its length */
        PTR machine,		/* compiled regular expression */
        size_t *lenp)		/* where to return matched-length */
{
    register STATE *m = (STATE *) machine;
    char *s = str;
    char *ss;
    register RT_STATE *stackp;
    int u_flag, t;
    char *str_end = s + str_len;
    RT_POS_ENTRY *sp;
    char *ts;

    /* state of current best match stored here */
    char *cb_ss;		/* the start */
    char *cb_e = 0;		/* the end , pts at first char not matched */
    STATE *m_best = 0;

    *lenp = 0;

    /* check for the easy case */
    if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) {
        if ((ts = str_str(s, str_len, m->s_data.str, (size_t) m->s_len)))
            *lenp = m->s_len;
        return ts;
    }

    u_flag = U_ON;
    cb_ss = ss = (char *) 0;
    stackp = RE_run_stack_empty;
    sp = RE_pos_stack_empty;
    goto reswitch;

refill:
    if (stackp == RE_run_stack_empty) {
        if (cb_ss)
            *lenp = (unsigned) (cb_e - cb_ss);
        return cb_ss;
    }
    ss = stackp->ss;
    s = (stackp--)->s;
    if (cb_ss) {		/* does new state start too late ? */
        if (ss) {
            if (cb_ss < ss || (cb_ss == ss && cb_e == str_end)) {
                goto refill;
            }
        } else if (cb_ss < s || (cb_ss == s && cb_e == str_end)) {
            goto refill;
        }
    }

    m = (stackp + 1)->m;
    sp = RE_pos_stack_base + (stackp + 1)->sp;
    sp->prev_offset = (stackp + 1)->tp;
    u_flag = (stackp + 1)->u;

reswitch:

    switch (m->s_type + u_flag) {
    case M_STR + U_OFF + END_OFF:
        if (strncmp(s, m->s_data.str, (size_t) m->s_len)) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s += m->s_len;
        m++;
        goto reswitch;

    case M_STR + U_OFF + END_ON:
        if (strcmp(s, m->s_data.str)) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s += m->s_len;
        m++;
        goto reswitch;

    case M_STR + U_ON + END_OFF:
        if (s >= str_end) {
            goto refill;
        }
        if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) {
            goto refill;
        }
        if (s >= str + strlen(str)) {
            goto refill;
        }
        push(m, s + 1, sp, ss, U_ON);
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s += m->s_len;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_STR + U_ON + END_ON:
        t = (int) ((str_end - s) - m->s_len);
        if (t < 0 || memcmp(ts = s + t, m->s_data.str, (size_t) m->s_len)) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && ts > cb_ss) {
                goto refill;
            } else {
                ss = ts;
            }
        }
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_CLASS + U_OFF + END_OFF:
        if (s >= str_end)
            goto refill;
        if (!ison(*m->s_data.bvp, s[0])) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s++;
        m++;
        goto reswitch;

    case M_CLASS + U_OFF + END_ON:
        if (s >= str_end)
            goto refill;
        if (s[1] || !ison(*m->s_data.bvp, s[0])) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s++;
        m++;
        goto reswitch;

    case M_CLASS + U_ON + END_OFF:
        if (s >= str_end)
            goto refill;
        while (!ison(*m->s_data.bvp, s[0])) {
            if (s >= str_end) {
                goto refill;
            } else {
                s++;
            }
        }
        if (s >= str_end) {
            goto refill;
        }
        s++;
        push(m, s, sp, ss, U_ON);
        if (!ss) {
            if (cb_ss && s - 1 > cb_ss) {
                goto refill;
            } else {
                ss = s - 1;
            }
        }
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_CLASS + U_ON + END_ON:
        if ((s >= str_end) || !ison(*m->s_data.bvp, str_end[-1])) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && str_end - 1 > cb_ss) {
                goto refill;
            } else {
                ss = str_end - 1;
            }
        }
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_ANY + U_OFF + END_OFF:
        if (s >= str_end) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s++;
        m++;
        goto reswitch;

    case M_ANY + U_OFF + END_ON:
        if ((s >= str_end) || ((s + 1) < str_end)) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        s++;
        m++;
        goto reswitch;

    case M_ANY + U_ON + END_OFF:
        if (s >= str_end) {
            goto refill;
        }
        s++;
        push(m, s, sp, ss, U_ON);
        if (!ss) {
            if (cb_ss && s - 1 > cb_ss) {
                goto refill;
            } else {
                ss = s - 1;
            }
        }
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_ANY + U_ON + END_ON:
        if (s >= str_end) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && str_end - 1 > cb_ss) {
                goto refill;
            } else {
                ss = str_end - 1;
            }
        }
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_START + U_OFF + END_OFF:
    case M_START + U_ON + END_OFF:
        if (s != str) {
            goto refill;
        }
        ss = s;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_START + U_OFF + END_ON:
    case M_START + U_ON + END_ON:
        if (s != str || (s < str_end)) {
            goto refill;
        }
        ss = s;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_END + U_OFF:
        if (s < str_end) {
            goto refill;
        }
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        m++;
        goto reswitch;

    case M_END + U_ON:
        s = str_end;
        if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        m++;
        u_flag = U_OFF;
        goto reswitch;

        CASE_UANY(M_U):
            if (!ss) {
            if (cb_ss && s > cb_ss) {
                goto refill;
            } else {
                ss = s;
            }
        }
        u_flag = U_ON;
        m++;
        goto reswitch;

        CASE_UANY(M_1J):
            m += m->s_data.jump;
            goto reswitch;

            CASE_UANY(M_SAVE_POS):	/* save position for a later M_2JC */
            /* see also REtest */
            sp = RE_pos_push(sp, stackp, s);
            m++;
            goto reswitch;

            CASE_UANY(M_2JA):	/* take the non jump branch */
            push(m + m->s_data.jump, s, sp, ss, u_flag);
            m++;
            goto reswitch;

            CASE_UANY(M_2JC):	/* take the jump branch if position changed */
            /* see REtest */
            if (RE_pos_pop(&sp, stackp) == s) {
            m++;
            goto reswitch;
        }
        /* fall thru */

        CASE_UANY(M_2JB):	/* take the jump branch */
            push(m + 1, s, sp, ss, u_flag);
            m += m->s_data.jump;
            goto reswitch;

        case M_ACCEPT + U_OFF:
            if (!ss)
                ss = s;
            if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) {
            /* we have a new current best */
            cb_ss = ss;
            cb_e = s;
            m_best = m;
        }
        goto refill;

    case M_ACCEPT + U_ON:
        if (!ss) {
            ss = s;
        } else {
            s = str_end;
        }

        if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) {
            /* we have a new current best */
            cb_ss = ss;
            cb_e = s;
            m_best = m;
        }
        goto refill;

    default:
        RE_panic("unexpected case in REmatch");
    }
}
Exemple #2
0
/*
 * test if str ~ /machine/
 */
int
REtest(char *str,		/* string to test */
       size_t len,		/* ...its length */
       PTR machine)		/* compiled regular-expression */
{
    register STATE *m = (STATE *) machine;
    char *s = str;
    register RT_STATE *stackp;
    int u_flag;
    char *str_end = str + len;
    RT_POS_ENTRY *sp;
    int t;			/*convenient temps */
    STATE *tm;

    /* handle the easy case quickly */
    if ((m + 1)->s_type == M_ACCEPT && m->s_type == M_STR) {
        return str_str(s, len, m->s_data.str, (size_t) m->s_len) != (char *) 0;
    } else {
        u_flag = U_ON;
        stackp = RE_run_stack_empty;
        sp = RE_pos_stack_empty;
        goto reswitch;
    }

refill:
    if (stackp == RE_run_stack_empty)
        return 0;
    m = stackp->m;
    s = stackp->s;
    sp = RE_pos_stack_base + stackp->sp;
    sp->prev_offset = stackp->tp;
    u_flag = (stackp--)->u;

reswitch:

    switch (m->s_type + u_flag) {
    case M_STR + U_OFF + END_OFF:
        if (strncmp(s, m->s_data.str, (size_t) m->s_len))
            goto refill;
        s += m->s_len;
        m++;
        goto reswitch;

    case M_STR + U_OFF + END_ON:
        if (strcmp(s, m->s_data.str))
            goto refill;
        s += m->s_len;
        m++;
        goto reswitch;

    case M_STR + U_ON + END_OFF:
        if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len)))
            goto refill;
        push(m, s + 1, sp, U_ON);
        s += m->s_len;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_STR + U_ON + END_ON:
        t = (str_end - s) - m->s_len;
        if (t < 0 || memcmp(s + t, m->s_data.str, (size_t) m->s_len))
            goto refill;
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_CLASS + U_OFF + END_OFF:
        if (s >= str_end || !ison(*m->s_data.bvp, s[0]))
            goto refill;
        s++;
        m++;
        goto reswitch;

    case M_CLASS + U_OFF + END_ON:
        if (s >= str_end)
            goto refill;
        if ((s + 1) < str_end || !ison(*m->s_data.bvp, s[0]))
            goto refill;
        s++;
        m++;
        goto reswitch;

    case M_CLASS + U_ON + END_OFF:
        for (;;) {
            if (s >= str_end)
                goto refill;
            else if (ison(*m->s_data.bvp, s[0]))
                break;
            s++;
        }
        s++;
        push(m, s, sp, U_ON);
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_CLASS + U_ON + END_ON:
        if (s >= str_end || !ison(*m->s_data.bvp, str_end[-1]))
            goto refill;
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_ANY + U_OFF + END_OFF:
        if (s >= str_end)
            goto refill;
        s++;
        m++;
        goto reswitch;

    case M_ANY + U_OFF + END_ON:
        if (s >= str_end || (s + 1) < str_end)
            goto refill;
        s++;
        m++;
        goto reswitch;

    case M_ANY + U_ON + END_OFF:
        if (s >= str_end)
            goto refill;
        s++;
        push(m, s, sp, U_ON);
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_ANY + U_ON + END_ON:
        if (s >= str_end)
            goto refill;
        s = str_end;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_START + U_OFF + END_OFF:
    case M_START + U_ON + END_OFF:
        if (s != str)
            goto refill;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_START + U_OFF + END_ON:
    case M_START + U_ON + END_ON:
        if (s != str || s < str_end)
            goto refill;
        m++;
        u_flag = U_OFF;
        goto reswitch;

    case M_END + U_OFF:
        if (s < str_end)
            goto refill;
        m++;
        goto reswitch;

    case M_END + U_ON:
        s += strlen(s);
        m++;
        u_flag = U_OFF;
        goto reswitch;

        CASE_UANY(M_U):
            u_flag = U_ON;
            m++;
            goto reswitch;

            CASE_UANY(M_1J):
            m += m->s_data.jump;
            goto reswitch;

            CASE_UANY(M_SAVE_POS):	/* save position for a later M_2JC */
            sp = RE_pos_push(sp, stackp, s);
            m++;
            goto reswitch;

            CASE_UANY(M_2JA):	/* take the non jump branch */
            /* don't stack an ACCEPT */
            if ((tm = m + m->s_data.jump)->s_type == M_ACCEPT)
                return 1;
            push(tm, s, sp, u_flag);
            m++;
            goto reswitch;

            CASE_UANY(M_2JC):	/* take the jump branch if position changed */
            if (RE_pos_pop(&sp, stackp) == s) {
            /* did not advance: do not jump back */
            m++;
            goto reswitch;
        }
        /* fall thru */

        CASE_UANY(M_2JB):	/* take the jump branch */
            /* don't stack an ACCEPT */
            if ((tm = m + 1)->s_type == M_ACCEPT)
                return 1;
            push(tm, s, sp, u_flag);
            m += m->s_data.jump;
            goto reswitch;

            CASE_UANY(M_ACCEPT):
            return 1;

        default:
            RE_panic("unexpected case in REtest");
        }
    }