Exemplo n.º 1
0
template <class LbrStruct> static
void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
             const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
    assert(nfa);

    RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);

    DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
                 repeatTypeName(rtype), repeatMin.str().c_str(),
                 repeatMax.str().c_str());

    // Fill the lbr_common structure first. Note that the RepeatInfo structure
    // directly follows the LbrStruct.
    const u32 info_offset = sizeof(LbrStruct);
    c->repeatInfoOffset = info_offset;
    c->report = report;

    RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
    info->type = verify_u8(rtype);
    info->repeatMin = depth_to_u32(repeatMin);
    info->repeatMax = depth_to_u32(repeatMax);
    info->stateSize = rsi.stateSize;
    info->packedCtrlSize = rsi.packedCtrlSize;
    info->horizon = rsi.horizon;
    info->minPeriod = minPeriod;
    copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
    info->patchCount = rsi.patchCount;
    info->patchSize = rsi.patchSize;
    info->encodingSize = rsi.encodingSize;
    info->patchesOffset = rsi.patchesOffset;

    // Fill the NFA structure.
    nfa->nPositions = repeatMin;
    nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
    nfa->scratchStateSize = (u32)sizeof(lbr_state);
    nfa->minWidth = verify_u32(repeatMin);
    nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;

    // Fill the lbr table for sparse lbr model.
    if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
        u64a *table = getTable<LbrStruct>(nfa);
        // Adjust table length according to the optimal patch length.
        size_t len = nfa->length;
        assert((u32)repeatMax >= rsi.patchSize);
        len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
        nfa->length = verify_u32(len);
        info->length = verify_u32(sizeof(RepeatInfo)
                                  + sizeof(u64a) * (rsi.patchSize + 1));
        copy_bytes(table, rsi.table);
    }
}
Exemplo n.º 2
0
static
aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
                                     const depth &repeatMin,
                                     const depth &repeatMax, u32 minPeriod,
                                     bool is_reset, ReportID report) {
    DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
                 repeatMin.str().c_str(), repeatMax.str().c_str(),
                 describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
                 report);
    assert(repeatMin <= repeatMax);
    assert(repeatMax.is_reachable());

    aligned_unique_ptr<NFA> nfa
        = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);

    if (!nfa) {
        nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                            report);
    }
    if (!nfa) {
        nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }

    if (!nfa) {
        assert(0);
        return nullptr;
    }

    return nfa;
}
Exemplo n.º 3
0
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
                                      UNUSED som_type som) {
    const depth max_width = findMaxWidth(castle);
    DEBUG_PRINTF("castle has reach %s and max width %s\n",
                  describeClass(castle.reach()).c_str(),
                  max_width.str().c_str());

    const CharReach escape = ~castle.reach(); // invert reach for stop chars.

    u32 d = min(max_width, depth(MAX_STOP_DEPTH));
    const u8 mask = verify_u8((1U << d) - 1);

    vector<u8> stop(N_CHARS, 0);

    for (size_t c = escape.find_first(); c != escape.npos;
         c = escape.find_next(c)) {
        stop[c] |= mask;
    }

    return stop;
}
Exemplo n.º 4
0
/** If the pattern is unanchored, has a max_offset and has not asked for SOM,
 * we can use that knowledge to anchor it which will limit its lifespan. Note
 * that we can't use this transformation if there's a min_length, as it's
 * currently handled using "sly SOM".
 *
 * Note that it is possible to handle graphs that have a combination of
 * anchored and unanchored paths, but it's too tricky for the moment.
 */
static
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
                                    const depth &maxWidth) {
    assert(!g.som);
    assert(g.max_offset != MAX_OFFSET);
    assert(minWidth <= maxWidth);
    assert(maxWidth.is_reachable());

    DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
                 minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
                 g.max_offset);

    if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
        return false;
    }

    if (g.max_offset < minWidth) {
        assert(0);
        return false;
    }

    // If the pattern has virtual starts, we probably don't want to touch it.
    if (hasVirtualStarts(g)) {
        DEBUG_PRINTF("virtual starts, bailing\n");
        return false;
    }

    // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
    // would just need to be a little careful with reports.
    if (isVacuous(g)) {
        DEBUG_PRINTF("vacuous, bailing\n");
        return false;
    }

    u32 min_bound, max_bound;
    if (maxWidth.is_infinite()) {
        min_bound = 0;
        max_bound = g.max_offset - minWidth;
    } else {
        min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
        max_bound = g.max_offset - minWidth;
    }

    DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);

    vector<NFAVertex> initials;
    for (auto v : adjacent_vertices_range(g.startDs, g)) {
        if (v == g.startDs) {
            continue;
        }
        initials.push_back(v);
    }
    if (initials.empty()) {
        DEBUG_PRINTF("no initial vertices\n");
        return false;
    }

    // Wire up 'min_offset' mandatory dots from anchored start.
    NFAVertex u = g.start;
    for (u32 i = 0; i < min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        add_edge(u, v, g);
        u = v;
    }

    NFAVertex head = u;

    // Wire up optional dots for (max_offset - min_offset).
    for (u32 i = 0; i < max_bound - min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
        u = v;
    }

    // Remove edges from starts and wire both head and u to our initials.
    for (auto v : initials) {
        remove_edge(g.startDs, v, g);
        remove_edge(g.start, v, g);

        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
    }

    g.renumberVertices();
    g.renumberEdges();

    return true;
}