Пример #1
0
 // Ugly but simple.
 string make_pattern() {
     std::ostringstream oss;
     oss << "^[";
     for (size_t i = reach.find_first(); i != CharReach::npos;
          i = reach.find_next(i)) {
         oss << "\\x" << std::hex << std::setw(2) << std::setfill('0')
             << (unsigned)(i & 0xff) << std::dec;
     }
     oss << "]{" << min << "," << max << "}";
     return oss.str();
 }
Пример #2
0
TEST(ng_charreach, setRange) {
    // Exhaustive test: every possible contiguous range.
    for (unsigned range = 0; range < 256; range++) {
        for (unsigned from = 0; from < 256 - range; from++) {
            unsigned to = from + range;
            CharReach cr;
            cr.setRange(from, to);
            ASSERT_EQ(from, cr.find_first());
            ASSERT_EQ(to, cr.find_last());
            ASSERT_EQ(range + 1, cr.count());
        }
    }
}
Пример #3
0
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
 * build an eight-bit mask per character C, with each bit representing the
 * depth before the location of character C (if encountered) that the NFA would
 * be in a predictable start state. */
vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    vector<CharReach> reach(MAX_STOP_DEPTH);

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }
        CharReach v_cr;
        if (som == SOM_NONE) {
            v_cr = reduced_cr(v, g, no_vertices);
        } else {
            v_cr = g[v].char_reach;
        }

        u32 d = min(max_depth, depths.maxDist(g, v));
        for (u32 i = 0; i < d; i++) {
            reach[i] |= v_cr;
        }
    }

#ifdef DEBUG
    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        DEBUG_PRINTF("depth %u, stop chars: ", i);
        describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
        printf("\n");
    }
#endif

    vector<u8> stop(N_CHARS, 0);

    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        CharReach cr = ~reach[i]; // invert reach for stop chars.
        const u8 mask = 1U << i;
        for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
            stop[c] |= mask;
        }
    }

    return stop;
}
Пример #4
0
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
                                      UNUSED som_type som) {
    const depth max_width = findMaxWidth(castle);
    DEBUG_PRINTF("castle has reach %s and max width %s\n",
                  describeClass(castle.reach()).c_str(),
                  max_width.str().c_str());

    const CharReach escape = ~castle.reach(); // invert reach for stop chars.

    u32 d = min(max_width, depth(MAX_STOP_DEPTH));
    const u8 mask = verify_u8((1U << d) - 1);

    vector<u8> stop(N_CHARS, 0);

    for (size_t c = escape.find_first(); c != escape.npos;
         c = escape.find_next(c)) {
        stop[c] |= mask;
    }

    return stop;
}
Пример #5
0
TEST(ng_charreach, count) {
    CharReach cr;

    cr.set(1);
    cr.set(2);
    cr.set('a');
    cr.set('Z');
    cr.set('m');
    cr.set('~');
    cr.set(210);

    size_t n = cr.find_first();
    ASSERT_FALSE(n == CharReach::npos);

    unsigned int i = 0;
    while (n != CharReach::npos) {
        i++;
        n = cr.find_next(n);
    }

    ASSERT_EQ(i, cr.count());
}
Пример #6
0
static
aligned_unique_ptr<NFA> buildLbrVerm(const CharReach &cr,
                                     const depth &repeatMin,
                                     const depth &repeatMax, u32 minPeriod,
                                     bool is_reset, ReportID report) {
    const CharReach escapes(~cr);

    if (escapes.count() != 1) {
        return nullptr;
    }

    enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
                                             is_reset);
    aligned_unique_ptr<NFA> nfa
        = makeLbrNfa<lbr_verm>(LBR_NFA_Verm, rtype, repeatMax);
    struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
    lv->c = escapes.find_first();

    fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
                      minPeriod, rtype);

    DEBUG_PRINTF("built verm lbr\n");
    return nfa;
}