Ejemplo n.º 1
0
static
void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
                      map<s32, CharReach> &look) {
    ue2::flat_set<NFAVertex> curr, next;

    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
        if (contains(g[v].reports, report)) {
            curr.insert(v);
        }
    }

    for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
        if (curr.empty() || contains(curr, g.start) ||
            contains(curr, g.startDs)) {
            break;
        }

        next.clear();
        CharReach cr;

        for (auto v : curr) {
            assert(!is_special(v, g));
            cr |= g[v].char_reach;
            insert(&next, inv_adjacent_vertices(v, g));
        }

        assert(cr.any());
        look[0 - i] |= cr;
        curr.swap(next);
    }
}
Ejemplo n.º 2
0
TEST(ReverseTruffle, ExecMatch3) {
    m128 mask1, mask2;

    CharReach chars;
    chars.set('a');
    chars.set('B');

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    /*          0123456789012345678901234567890 */
    char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaBbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
    size_t len = strlen(t1);

    for (size_t i = 0; i < 16; i++) {
        const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i);
        ASSERT_NE((const u8 *)t1 - 1, rv); // not found
        EXPECT_EQ('B', (char)*rv);
        ASSERT_EQ((const u8 *)t1 + 32, rv);
    }

    // check that we match the 'a' bytes as well.
    ASSERT_EQ('B', t1[32]);
    t1[32] = 'b';
    for (size_t i = 0; i < 16; i++) {
        const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i);
        ASSERT_NE((const u8 *)t1 - 1, rv); // not found
        EXPECT_EQ('a', (char)*rv);
        ASSERT_EQ((const u8 *)t1 + 31, rv);
    }
}
Ejemplo n.º 3
0
void describeEdge(FILE *f, const u16 *t, u16 i) {
    for (u16 s = 0; s < N_CHARS; s++) {
        if (!t[s]) {
            continue;
        }

        u16 ss;
        for (ss = 0; ss < s; ss++) {
            if (t[s] == t[ss]) {
                break;
            }
        }

        if (ss != s) {
            continue;
        }

        CharReach reach;
        for (ss = s; ss < 256; ss++) {
            if (t[s] == t[ss]) {
                reach.set(ss);
            }
        }

        fprintf(f, "%u -> %u [ label = \"", i, t[s]);

        describeClass(f, reach, 5, CC_OUT_DOT);

        fprintf(f, "\" ];\n");
    }
}
Ejemplo n.º 4
0
static
void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
    if (rdfa.states.size() < 2) {
        return;
    }

    ue2::flat_set<dstate_id_t> curr, next;
    curr.insert(rdfa.start_anchored);

    for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
        next.clear();
        CharReach cr;

        for (const auto state_id : curr) {
            const dstate &ds = rdfa.states[state_id];

            if (!ds.reports.empty() || !ds.reports_eod.empty()) {
                return;
            }

            for (unsigned c = 0; c < N_CHARS; c++) {
                dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
                if (succ != DEAD_STATE) {
                    cr.set(c);
                    next.insert(succ);
                }
            }
        }

        assert(cr.any());
        look[i] |= cr;
        curr.swap(next);
    }
}
Ejemplo n.º 5
0
static
void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
    ue2::flat_set<NFAVertex> curr, next;

    // Consider only successors of start with the required top.
    for (const auto &e : out_edges_range(g.start, g)) {
        NFAVertex v = target(e, g);
        if (v == g.startDs) {
            continue;
        }
        if (g[e].top == top) {
            curr.insert(v);
        }
    }

    for (u32 i = 0; i < MAX_FWD_LEN; i++) {
        if (curr.empty() || contains(curr, g.accept) ||
            contains(curr, g.acceptEod)) {
            break;
        }

        next.clear();
        CharReach cr;

        for (auto v : curr) {
            assert(!is_special(v, g));
            cr |= g[v].char_reach;
            insert(&next, adjacent_vertices(v, g));
        }

        assert(cr.any());
        look[i] |= cr;
        curr.swap(next);
    }
}
Ejemplo n.º 6
0
/** Find the set of characters that are not present in the reachability of
 * graph \p g after a certain depth (currently 8). If a character in this set
 * is encountered, it means that the NFA is either dead or has not progressed
 * more than 8 characters from its start states. */
CharReach findStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    CharReach stopcr;

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }

        if (depths.maxDist(g, v) >= max_depth) {
            if (som == SOM_NONE) {
                stopcr |= reduced_cr(v, g, no_vertices);
            } else {
                stopcr |= g[v].char_reach;
            }
        }
    }

    // Turn alphabet into stops.
    stopcr.flip();

    return stopcr;
}
Ejemplo n.º 7
0
TEST(ng_charreach, bitwise) {
    CharReach cr;
    CharReach cr2;
    CharReach cr3;
    CharReach cr4;

    cr.set('a');

    cr2.set('z');


    cr3.set('a');
    cr3.set('z');

    ASSERT_TRUE(cr < cr3);

    cr4 |= cr;
    cr4 |= cr2;

    ASSERT_TRUE(cr3 == cr4);

    ASSERT_TRUE(cr3 == (cr | cr2));
    ASSERT_TRUE(cr4 == (cr | cr2));

    ASSERT_TRUE(cr == (cr & cr3));
    ASSERT_TRUE(cr2 == (cr2 & cr3));

    cr3 &= cr;

    ASSERT_FALSE(cr3.test('z'));
}
Ejemplo n.º 8
0
static
void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
    for (u16 s = 0; s < N_CHARS; s++) {
        if (!t[s]) {
            continue;
        }

        u16 ss;
        for (ss = 0; ss < s; ss++) {
            if (t[s] == t[ss]) {
                break;
            }
        }

        if (ss != s) {
            continue;
        }

        CharReach reach;
        for (ss = s; ss < 256; ss++) {
            if (t[s] == t[ss]) {
                reach.set(ss);
            }
        }

        fprintf(f, "%u -> %u [ ", i, t[s]);
        if (i < m->sheng_end && t[s] < m->sheng_end) {
            fprintf(f, "color = red, fontcolor = red ");
        }
        fprintf(f, "label = \"");
        describeClass(f, reach, 5, CC_OUT_DOT);

        fprintf(f, "\" ];\n");
    }
}
Ejemplo n.º 9
0
TEST(ng_charreach, init) {
    CharReach cr;

    ASSERT_EQ(0U, cr.count());
    ASSERT_TRUE(cr.none());
    ASSERT_FALSE(cr.all());
    ASSERT_EQ(256U, cr.size());
}
Ejemplo n.º 10
0
TEST(ng_charreach, dot) {
    CharReach dot = CharReach::dot();
    ASSERT_EQ(256, dot.count());
    ASSERT_TRUE(dot.all());
    for (size_t i = 0; i < 256; i++) {
        ASSERT_TRUE(dot.test(i));
    }
}
Ejemplo n.º 11
0
TEST(ng_charreach, copy) {
    CharReach cr;
    cr.set('a');
    cr.set('z');

    CharReach cr2(cr);

    ASSERT_EQ(cr.count(), cr2.count());
    ASSERT_TRUE(cr == cr2);
}
Ejemplo n.º 12
0
 // Ugly but simple.
 string make_pattern() {
     std::ostringstream oss;
     oss << "^[";
     for (size_t i = reach.find_first(); i != CharReach::npos;
          i = reach.find_next(i)) {
         oss << "\\x" << std::hex << std::setw(2) << std::setfill('0')
             << (unsigned)(i & 0xff) << std::dec;
     }
     oss << "]{" << min << "," << max << "}";
     return oss.str();
 }
Ejemplo n.º 13
0
TEST(ng_charreach, set) {
    CharReach cr;

    ASSERT_EQ(0U, cr.count());
    ASSERT_TRUE(cr.none());
    ASSERT_FALSE(cr.all());
    cr.set('q');
    ASSERT_EQ(1U, cr.count());
    cr.setall();
    ASSERT_EQ(cr.size(), cr.count());
    ASSERT_TRUE(cr.all());
}
Ejemplo n.º 14
0
TEST(ng_charreach, assignment) {
    CharReach cr;
    cr.set('f');
    cr.set('l');
    cr.set('y');

    CharReach cr2;
    cr2 = cr;

    ASSERT_EQ(cr.count(), cr2.count());
    ASSERT_TRUE(cr == cr2);
}
Ejemplo n.º 15
0
TEST(ng_charreach, setRange) {
    // Exhaustive test: every possible contiguous range.
    for (unsigned range = 0; range < 256; range++) {
        for (unsigned from = 0; from < 256 - range; from++) {
            unsigned to = from + range;
            CharReach cr;
            cr.setRange(from, to);
            ASSERT_EQ(from, cr.find_first());
            ASSERT_EQ(to, cr.find_last());
            ASSERT_EQ(range + 1, cr.count());
        }
    }
}
Ejemplo n.º 16
0
TEST(ReverseTruffle, ExecMiniMatch0) {
    m128 lo, hi;

    CharReach chars;
    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi);

    char t1[] = "a";

    const u8 *rv = rtruffleExec(lo, hi, (u8 *)t1, (u8 *)t1 + strlen(t1));

    ASSERT_EQ((size_t)t1, (size_t)rv);
}
Ejemplo n.º 17
0
TEST(Truffle, ExecMiniMatch3) {
    m128 lo, hi;

    CharReach chars;
    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi);

    char t1[] = "\0\0\0\0\0\0\0a\0\0\0";

    const u8 *rv = truffleExec(lo, hi, (u8 *)t1, (u8 *)t1 + 11);

    ASSERT_EQ((size_t)t1 + 7, (size_t)rv);
}
Ejemplo n.º 18
0
TEST(ng_charreach, find_nth) {
    const size_t npos = CharReach::npos;

    // One bit cases.
    for (size_t i = 0; i < 256; i++) {
        CharReach cr((unsigned char)i);
        ASSERT_EQ(i, cr.find_nth(0));
        ASSERT_EQ(npos, cr.find_nth(1));
    }

    // All bits set.
    CharReach dot = CharReach::dot();
    for (size_t i = 0; i < 256; i++) {
        ASSERT_EQ(i, dot.find_nth(i));
    }

    // Trivial two bit cases.
    for (size_t i = 0; i < 128; i++) {
        CharReach cr;
        cr.set(i);
        cr.set(256 - i);
        ASSERT_EQ(i, cr.find_nth(0));
        ASSERT_EQ(256 - i, cr.find_nth(1));
        ASSERT_EQ(npos, cr.find_nth(3));
    }

    // More complex case.
    const std::string str("\x01\x02\x03\x05\x06\x20!#$%&./0123568:;ABCDEFMNOPUYZbcdefwxyz");
    CharReach cr(str);
    for (size_t i = 0; i < str.length(); i++) {
        ASSERT_EQ(str[i], cr.find_nth(i));
    }
    ASSERT_EQ(npos, cr.find_nth(str.length()));
}
Ejemplo n.º 19
0
TEST(ng_charreach, caseless) {
    CharReach cr;

    cr.set('a');
    ASSERT_FALSE(cr.isCaselessChar());
    cr.set('A');
    ASSERT_TRUE(cr.isCaselessChar());
    cr.set('b');
    ASSERT_FALSE(cr.isCaselessChar());
    cr.set('B');
    ASSERT_FALSE(cr.isCaselessChar());
}
Ejemplo n.º 20
0
u32 mcclellanStartReachSize(const raw_dfa *raw) {
    if (raw->states.size() < 2) {
        return 0;
    }

    const dstate &ds = raw->states[raw->start_anchored];

    CharReach out;
    for (unsigned i = 0; i < N_CHARS; i++) {
        if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) {
            out.set(i);
        }
    }

    return out.count();
}
Ejemplo n.º 21
0
TEST(Truffle, CompileDot) {
    m128 mask1, mask2;
    memset(&mask1, 0, sizeof(mask1));
    memset(&mask2, 0, sizeof(mask2));

    CharReach chars;

    chars.setall();

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2);

    ASSERT_EQ(out, chars);

}
Ejemplo n.º 22
0
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_TRUF);

    StdioFile f(base + ".txt", "w");

    const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
    lbrDumpCommon(&lt->common, f);

    CharReach cr = truffle2cr((const u8 *)&lt->mask1,
                              (const u8 *)&lt->mask2);
    fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Ejemplo n.º 23
0
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_SHUF);

    StdioFile f(base + ".txt", "w");

    const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
    lbrDumpCommon(&ls->common, f);

    CharReach cr = shufti2cr((const u8 *)&ls->mask_lo,
                             (const u8 *)&ls->mask_hi);
    fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Ejemplo n.º 24
0
TEST(Truffle, ExecNoMatch3) {
    m128 mask1, mask2;

    CharReach chars;

    chars.set('V'); /* V = 0x56, e = 0x65 */

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee";

    for (size_t i = 0; i < 16; i++) {
        const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1));

        ASSERT_EQ((size_t)t1 + strlen(t1), (size_t)rv);
    }
}
Ejemplo n.º 25
0
TEST(ReverseTruffle, ExecNoMatch3) {
    m128 mask1, mask2;

    CharReach chars;
    chars.set('V'); /* V = 0x56, e = 0x65 */

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    char t[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee";
    char *t1 = t + 1;
    size_t len = strlen(t1);

    for (size_t i = 0; i < 16; i++) {
        const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i);
        ASSERT_EQ((const u8 *)t, rv);
    }
}
Ejemplo n.º 26
0
TEST(ReverseTruffle, ExecMiniMatch2) {
    m128 mask1, mask2;

    CharReach chars;
    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    /*          0123456789012345678901234567890 */
    char t1[] = "babbbbbabbbb";
    size_t len = strlen(t1);

    const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len);
    ASSERT_NE((const u8 *)t1 - 1, rv); // not found
    EXPECT_EQ('a', (char)*rv);
    ASSERT_EQ((const u8 *)t1 + 7, rv);
}
Ejemplo n.º 27
0
TEST(Truffle, ExecMatch2) {
    m128 mask1, mask2;

    CharReach chars;

    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    /*          0123456789012345678901234567890 */
    char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb";

    for (size_t i = 0; i < 16; i++) {
        const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1));

        ASSERT_EQ((size_t)t1 + 17, (size_t)rv);
    }
}
Ejemplo n.º 28
0
TEST(Truffle, ExecMatch5) {
    m128 mask1, mask2;

    CharReach chars;

    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";

    for (size_t i = 0; i < 31; i++) {
        t1[48 - i] = 'a';
        const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + strlen(t1));

        ASSERT_EQ((size_t)&t1[48 - i], (size_t)rv);
    }
}
Ejemplo n.º 29
0
TEST(ReverseTruffle, ExecMatch5) {
    m128 mask1, mask2;

    CharReach chars;
    chars.set('a');

    truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2);

    char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
    size_t len = strlen(t1);

    for (size_t i = 0; i < len; i++) {
        t1[i] = 'a';
        const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len);

        ASSERT_EQ((const u8 *)t1 + i, rv);
    }
}
Ejemplo n.º 30
0
TEST(ng_charreach, caseless2) {
    // Test every pair of characters.
    for (size_t i = 0; i < 256; i++) {
        ASSERT_FALSE(CharReach((unsigned char)i).isCaselessChar());
        for (size_t j = 0; j < 256; j++) {
            CharReach cr;
            cr.set(i);
            cr.set(j);

            bool upper_lower = (i >= 'A' && i <= 'Z') && j == i + 0x20;
            bool lower_upper = (i >= 'a' && i <= 'z') && i == j + 0x20;
            bool caseless_pair = upper_lower | lower_upper;

            ASSERT_EQ(caseless_pair, cr.isCaselessChar())
                << "Failed for i=" << i << ", j=" << j;
        }
    }
}