Exemplo n.º 1
0
static
u32 depth_to_u32(const depth &d) {
    assert(d.is_reachable());
    if (d.is_infinite()) {
        return REPEAT_INF;
    }

    u32 d_val = d;
    assert(d_val < REPEAT_INF);
    return d_val;
}
Exemplo n.º 2
0
static
aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
                                     const depth &repeatMin,
                                     const depth &repeatMax, u32 minPeriod,
                                     bool is_reset, ReportID report) {
    DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
                 repeatMin.str().c_str(), repeatMax.str().c_str(),
                 describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
                 report);
    assert(repeatMin <= repeatMax);
    assert(repeatMax.is_reachable());

    aligned_unique_ptr<NFA> nfa
        = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);

    if (!nfa) {
        nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                            report);
    }
    if (!nfa) {
        nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }

    if (!nfa) {
        assert(0);
        return nullptr;
    }

    return nfa;
}
Exemplo n.º 3
0
/** If the pattern is unanchored, has a max_offset and has not asked for SOM,
 * we can use that knowledge to anchor it which will limit its lifespan. Note
 * that we can't use this transformation if there's a min_length, as it's
 * currently handled using "sly SOM".
 *
 * Note that it is possible to handle graphs that have a combination of
 * anchored and unanchored paths, but it's too tricky for the moment.
 */
static
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
                                    const depth &maxWidth) {
    assert(!g.som);
    assert(g.max_offset != MAX_OFFSET);
    assert(minWidth <= maxWidth);
    assert(maxWidth.is_reachable());

    DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
                 minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
                 g.max_offset);

    if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
        return false;
    }

    if (g.max_offset < minWidth) {
        assert(0);
        return false;
    }

    // If the pattern has virtual starts, we probably don't want to touch it.
    if (hasVirtualStarts(g)) {
        DEBUG_PRINTF("virtual starts, bailing\n");
        return false;
    }

    // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
    // would just need to be a little careful with reports.
    if (isVacuous(g)) {
        DEBUG_PRINTF("vacuous, bailing\n");
        return false;
    }

    u32 min_bound, max_bound;
    if (maxWidth.is_infinite()) {
        min_bound = 0;
        max_bound = g.max_offset - minWidth;
    } else {
        min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
        max_bound = g.max_offset - minWidth;
    }

    DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);

    vector<NFAVertex> initials;
    for (auto v : adjacent_vertices_range(g.startDs, g)) {
        if (v == g.startDs) {
            continue;
        }
        initials.push_back(v);
    }
    if (initials.empty()) {
        DEBUG_PRINTF("no initial vertices\n");
        return false;
    }

    // Wire up 'min_offset' mandatory dots from anchored start.
    NFAVertex u = g.start;
    for (u32 i = 0; i < min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        add_edge(u, v, g);
        u = v;
    }

    NFAVertex head = u;

    // Wire up optional dots for (max_offset - min_offset).
    for (u32 i = 0; i < max_bound - min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
        u = v;
    }

    // Remove edges from starts and wire both head and u to our initials.
    for (auto v : initials) {
        remove_edge(g.startDs, v, g);
        remove_edge(g.start, v, g);

        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
    }

    g.renumberVertices();
    g.renumberEdges();

    return true;
}
Exemplo n.º 4
0
RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
                                 const depth &repeatMax, u32 minPeriod)
    : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
      patchSize(0), encodingSize(0), patchesOffset(0) {
    assert(repeatMin <= repeatMax);
    assert(repeatMax.is_reachable());
    assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);

    switch (type) {
    case REPEAT_FIRST:
        assert(repeatMin.is_finite());
        stateSize = 0; // everything is in the control block.
        horizon = repeatMin;
        packedCtrlSize = calcPackedBytes(horizon + 1);
        break;
    case REPEAT_LAST:
        assert(repeatMax.is_finite());
        stateSize = 0; // everything is in the control block.
        horizon = repeatMax + 1;
        packedCtrlSize = calcPackedBytes(horizon + 1);
        break;
    case REPEAT_RING:
        assert(repeatMax.is_finite());
        stateSize = mmbit_size(repeatMax + 1);
        horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */
        // Packed offset member, plus two bytes for each ring index, reduced to
        // one byte each if they'll fit in eight bits.
        {
            u32 offset_len = calcPackedBytes(horizon + 1);
            u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4;
            packedCtrlSize = offset_len + ring_indices_len;
        }
        break;
    case REPEAT_RANGE:
        assert(repeatMax.is_finite());
        assert(repeatMin < repeatMax);
        stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16);
        horizon = repeatMax * 2 + 1;
        // Packed offset member, plus one byte for the number of range
        // elements.
        packedCtrlSize = calcPackedBytes(horizon + 1) + 1;
        break;
    case REPEAT_BITMAP:
        stateSize = 0; // everything is in the control block.
        horizon = 0;   // unused
        packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8;
        break;
    case REPEAT_SPARSE_OPTIMAL_P:
        assert(minPeriod);
        assert(repeatMax.is_finite());
        {
            u32 rv = repeatRecurTable(this, repeatMax, minPeriod);
            u32 repeatTmp = 0;
            if ((u32)repeatMax < minPeriod) {
                repeatTmp = repeatMax;
                patchCount = 1;
            } else {
                // find optimal patch size
                repeatTmp =
                    findOptimalPatchSize(this, repeatMax, minPeriod, rv);
                assert(patchCount < 65536);
            }
            DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin,
                         (u32)repeatMax, minPeriod);
            u64a maxVal = table[repeatTmp];
            encodingSize = calcPackedBytes(maxVal);
            patchSize = repeatTmp;
            assert(encodingSize <= 64);

            patchesOffset = mmbit_size(patchCount);
            stateSize = patchesOffset + encodingSize * patchCount;
            horizon = (repeatTmp * patchCount) * 2 + 1;
            u32 ring_indices_len = patchCount < depth(254) ? 2 : 4;
            packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len;
        }
        break;
    case REPEAT_TRAILER:
        assert(repeatMax.is_finite());
        assert(repeatMin <= depth(64));
        stateSize = 0; // everything is in the control block.
        horizon = repeatMax + 1;
        packedFieldSizes.resize(2);
        packedFieldSizes[0] = calcPackedBits(horizon + 1);
        packedFieldSizes[1] = repeatMin;
        packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
        break;
    }
    DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
                 packedCtrlSize, horizon);

    assert(packedCtrlSize <= sizeof(RepeatControl));
}