Пример #1
0
static void
printree(ACISM const*psp, int state, int depth, char *str,
            char const *charv, FILE*out, MEMREF const*pattv)
{
    SYMBOL sym;
    TRAN x;

    if (depth > (int)psp->maxlen) {
        fputs("oops\n", out);
        return;
    }

    x = psp->tranv[state];
    fprintf(out, "%5d:%.*s", state, depth, str);
    if (t_valid(psp,x) && t_next(psp,x))
        fprintf(out, " b=%"FX"d%s", t_next(psp,x), x & T_FLAGS ? " BAD" : "");
    fprintf(out, "\n");

    for (sym = 1; sym < psp->nsyms; ++sym) {
        x = p_tran(psp, state, sym);
        if (t_valid(psp, x)) {
            str[depth] = charv[sym];
            fprintf(out, "%*s%c %c%c",
                    depth+6, "", charv[sym],
                    x & IS_MATCH ? 'M' : '-',
                    x & IS_SUFFIX ? 'S' : '-');
            if (x & IS_MATCH && pattv && t_isleaf(psp, x))
                fprintf(out, " %.0d -> %.*s", PSTR(psp, t_strno(psp,x), pattv));
            if (x & IS_SUFFIX)
                fprintf(out, " ->S %"FX"d", t_next(psp, psp->tranv[state]));
            fprintf(out, "\n");
            if (!t_isleaf(psp, x))
                printree(psp, t_next(psp, x), depth+1, str, charv, out, pattv);
        }
    }
}
Пример #2
0
int
acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
           ACISM_ACTION *cb, void *context, int *statep, bool caseless)
{
    ac_trie_t const ps = *psp;
    char const *cp = text, *endp = cp + len;
    uint8_t s;
    STATE state = *statep;
    int ret = 0;

    while (cp < endp) {
    	s = caseless ? g_ascii_tolower (*cp++) : *cp++;
        _SYMBOL sym = ps.symv[s];
        if (!sym) {
            // Input byte is not in any pattern string.
            state = ROOT;
            continue;
        }

        // Search for a valid transition from this (state, sym),
        //  following the backref chain.

        TRAN next;
        while (!t_valid(&ps, next = p_tran(&ps, state, sym)) && state != ROOT) {
            TRAN back = p_tran(&ps, state, BACK);
            state = t_valid(&ps, back) ? t_next(&ps, back) : ROOT;
        }

        if (!t_valid(&ps, next))
            continue;

        if (!(next & (IS_MATCH | IS_SUFFIX))) {
            // No complete match yet; keep going.
            state = t_next(&ps, next);
            continue;
        }

        // At this point, one or more patterns have matched.
        // Find all matches by following the backref chain.
        // A valid node for (sym) with no SUFFIX flag marks the
        //  end of the suffix chain.
        // In the same backref traversal, find a new (state),
        //  if the original transition is to a leaf.

        STATE s = state;

        // Initially state is ROOT. The chain search saves the
        //  first state from which the next char has a transition.
        state = t_isleaf(&ps, next) ? 0 : t_next(&ps, next);

        while (1) {

            if (t_valid(&ps, next)) {

                if (next & IS_MATCH) {
                    unsigned strno, ss = s + sym, i;
                    if (t_isleaf(&ps, ps.tranv[ss])) {
                        strno = t_strno(&ps, ps.tranv[ss]);
                    } else {
                        for (i = p_hash(&ps, ss); ps.hashv[i].state != ss; ++i);
                        strno = ps.hashv[i].strno;
                    }

                    if ((ret = cb(strno, cp - text, context)))
                        goto EXIT;
                }

                if (!state && !t_isleaf(&ps, next))
                    state = t_next(&ps, next);
                if ( state && !(next & IS_SUFFIX))
                    break;
            }

            if (s == ROOT)
                break;

            TRAN b = p_tran(&ps, s, BACK);
            s = t_valid(&ps, b) ? t_next(&ps, b) : ROOT;
            next = p_tran(&ps, s, sym);
        }
    }
EXIT:
	*statep = state;
    return ret;
}