static u32 depth_to_u32(const depth &d) { assert(d.is_reachable()); if (d.is_infinite()) { return REPEAT_INF; } u32 d_val = d; assert(d_val < REPEAT_INF); return d_val; }
static aligned_unique_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", repeatMin.str().c_str(), repeatMax.str().c_str(), describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), report); assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); aligned_unique_ptr<NFA> nfa = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); if (!nfa) { nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { assert(0); return nullptr; } return nfa; }
/** If the pattern is unanchored, has a max_offset and has not asked for SOM, * we can use that knowledge to anchor it which will limit its lifespan. Note * that we can't use this transformation if there's a min_length, as it's * currently handled using "sly SOM". * * Note that it is possible to handle graphs that have a combination of * anchored and unanchored paths, but it's too tricky for the moment. */ static bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, const depth &maxWidth) { assert(!g.som); assert(g.max_offset != MAX_OFFSET); assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset, g.max_offset); if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } if (g.max_offset < minWidth) { assert(0); return false; } // If the pattern has virtual starts, we probably don't want to touch it. if (hasVirtualStarts(g)) { DEBUG_PRINTF("virtual starts, bailing\n"); return false; } // Similarly, bail if the pattern is vacuous. TODO: this could be done, we // would just need to be a little careful with reports. if (isVacuous(g)) { DEBUG_PRINTF("vacuous, bailing\n"); return false; } u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; max_bound = g.max_offset - minWidth; } else { min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0; max_bound = g.max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); vector<NFAVertex> initials; for (auto v : adjacent_vertices_range(g.startDs, g)) { if (v == g.startDs) { continue; } initials.push_back(v); } if (initials.empty()) { DEBUG_PRINTF("no initial vertices\n"); return false; } // Wire up 'min_offset' mandatory dots from anchored start. NFAVertex u = g.start; for (u32 i = 0; i < min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach.setall(); add_edge(u, v, g); u = v; } NFAVertex head = u; // Wire up optional dots for (max_offset - min_offset). for (u32 i = 0; i < max_bound - min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach.setall(); if (head != u) { add_edge(head, v, g); } add_edge(u, v, g); u = v; } // Remove edges from starts and wire both head and u to our initials. for (auto v : initials) { remove_edge(g.startDs, v, g); remove_edge(g.start, v, g); if (head != u) { add_edge(head, v, g); } add_edge(u, v, g); } g.renumberVertices(); g.renumberEdges(); return true; }
RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, const depth &repeatMax, u32 minPeriod) : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), patchSize(0), encodingSize(0), patchesOffset(0) { assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); switch (type) { case REPEAT_FIRST: assert(repeatMin.is_finite()); stateSize = 0; // everything is in the control block. horizon = repeatMin; packedCtrlSize = calcPackedBytes(horizon + 1); break; case REPEAT_LAST: assert(repeatMax.is_finite()); stateSize = 0; // everything is in the control block. horizon = repeatMax + 1; packedCtrlSize = calcPackedBytes(horizon + 1); break; case REPEAT_RING: assert(repeatMax.is_finite()); stateSize = mmbit_size(repeatMax + 1); horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ // Packed offset member, plus two bytes for each ring index, reduced to // one byte each if they'll fit in eight bits. { u32 offset_len = calcPackedBytes(horizon + 1); u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; packedCtrlSize = offset_len + ring_indices_len; } break; case REPEAT_RANGE: assert(repeatMax.is_finite()); assert(repeatMin < repeatMax); stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); horizon = repeatMax * 2 + 1; // Packed offset member, plus one byte for the number of range // elements. packedCtrlSize = calcPackedBytes(horizon + 1) + 1; break; case REPEAT_BITMAP: stateSize = 0; // everything is in the control block. horizon = 0; // unused packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; break; case REPEAT_SPARSE_OPTIMAL_P: assert(minPeriod); assert(repeatMax.is_finite()); { u32 rv = repeatRecurTable(this, repeatMax, minPeriod); u32 repeatTmp = 0; if ((u32)repeatMax < minPeriod) { repeatTmp = repeatMax; patchCount = 1; } else { // find optimal patch size repeatTmp = findOptimalPatchSize(this, repeatMax, minPeriod, rv); assert(patchCount < 65536); } DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, (u32)repeatMax, minPeriod); u64a maxVal = table[repeatTmp]; encodingSize = calcPackedBytes(maxVal); patchSize = repeatTmp; assert(encodingSize <= 64); patchesOffset = mmbit_size(patchCount); stateSize = patchesOffset + encodingSize * patchCount; horizon = (repeatTmp * patchCount) * 2 + 1; u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; } break; case REPEAT_TRAILER: assert(repeatMax.is_finite()); assert(repeatMin <= depth(64)); stateSize = 0; // everything is in the control block. horizon = repeatMax + 1; packedFieldSizes.resize(2); packedFieldSizes[0] = calcPackedBits(horizon + 1); packedFieldSizes[1] = repeatMin; packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; break; } DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, packedCtrlSize, horizon); assert(packedCtrlSize <= sizeof(RepeatControl)); }