/** Returns the maximum width in bytes of an input that will match the given * graph. If there is no maximum width, returns infinity. */ depth findMaxWidth(const NGHolder &h) { depth startDepth = findMaxWidth(h, h.start); depth dotstarDepth = findMaxWidth(h, h.startDs); DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), dotstarDepth.str().c_str()); if (startDepth.is_unreachable()) { return dotstarDepth; } else if (dotstarDepth.is_unreachable()) { return startDepth; } else { return max(startDepth, dotstarDepth); } }
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, UNUSED som_type som) { const depth max_width = findMaxWidth(castle); DEBUG_PRINTF("castle has reach %s and max width %s\n", describeClass(castle.reach()).c_str(), max_width.str().c_str()); const CharReach escape = ~castle.reach(); // invert reach for stop chars. u32 d = min(max_width, depth(MAX_STOP_DEPTH)); const u8 mask = verify_u8((1U << d) - 1); vector<u8> stop(N_CHARS, 0); for (size_t c = escape.find_first(); c != escape.npos; c = escape.find_next(c)) { stop[c] |= mask; } return stop; }
/** Make package appropriate size. */ void GlyphPacker::measure() { package.metrics.width = findMaxWidth() + GLYPH_PACKER_PADDING; package.metrics.height = factory->getFontHeight() + GLYPH_PACKER_PADDING; package.metrics.descent = factory->getFontDescent(); package.rows = 1; package.cols = 1; package.width = package.metrics.width; package.height = package.metrics.height; for (int tally=1; tally<GLYPH_PACKER_GLYPHS_COUNT; ) { if (package.width < package.height) { package.width += package.metrics.width; tally += package.rows; ++(package.cols); } else { package.height += package.metrics.height; tally += package.cols; ++(package.rows); } } }
void handleExtendedParams(ReportManager &rm, NGWrapper &g, UNUSED const CompileContext &cc) { if (!hasExtParams(g)) { return; } depth minWidth = findMinWidth(g); depth maxWidth = findMaxWidth(g); bool is_anchored = !has_proper_successor(g.startDs, g) && out_degree(g.start, g); bool has_offset_adj = hasOffsetAdjustments(rm, g); DEBUG_PRINTF("minWidth=%s, maxWidth=%s, anchored=%d, offset_adj=%d\n", minWidth.str().c_str(), maxWidth.str().c_str(), is_anchored, has_offset_adj); DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" << g.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; throw CompileError(g.expressionIndex, oss.str()); } if (minWidth > g.max_offset) { ostringstream oss; oss << "Expression has max_offset=" << g.max_offset << " but requires " << minWidth << " bytes to match."; throw CompileError(g.expressionIndex, oss.str()); } if (maxWidth.is_finite() && match_depths.max < g.min_length) { ostringstream oss; oss << "Expression has min_length=" << g.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; throw CompileError(g.expressionIndex, oss.str()); } if (g.min_length && g.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", g.min_length); g.min_length = 0; } if (!hasExtParams(g)) { return; } pruneVacuousEdges(g); pruneUnmatchable(g, rm); if (!has_offset_adj) { pruneExtUnreachable(g); } // We may have removed all the edges to accept, in which case this // expression cannot match. if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { throw CompileError(g.expressionIndex, "Extended parameter " "constraints can not be satisfied for any match from " "this expression."); } // Remove reports on vertices without an edge to accept (which have been // pruned above). clearReports(g); // Recalc. minWidth = findMinWidth(g); maxWidth = findMaxWidth(g); is_anchored = proper_out_degree(g.startDs, g) == 0 && out_degree(g.start, g); has_offset_adj = hasOffsetAdjustments(rm, g); // If the pattern is completely anchored and has a min_length set, this can // be converted to a min_offset. if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) { DEBUG_PRINTF("converting min_length to min_offset=%llu for " "anchored case\n", g.min_length); g.min_offset = g.min_length; g.min_length = 0; } if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) { DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", g.min_offset); g.min_offset = 0; } if (!hasExtParams(g)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ if (g.min_length && transformMinLengthToRepeat(rm, g)) { DEBUG_PRINTF("converted min_length to bounded repeat\n"); // recalc minWidth = findMinWidth(g); } // If the pattern is unanchored, has a max_offset and has not asked for // SOM, we can use that knowledge to anchor it which will limit its // lifespan. Note that we can't use this transformation if there's a // min_length, as it's currently handled using "sly SOM". // Note that it is possible to handle graphs that have a combination of // anchored and unanchored paths, but it's too tricky for the moment. if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length && !has_offset_adj && isUnanchored(g)) { if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) { DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), maxWidth.str().c_str()); if (minWidth == maxWidth) { // For a fixed width pattern, we can retire the offsets as they // are implicit in the graph now. g.min_offset = 0; g.max_offset = MAX_OFFSET; } } } //dumpGraph("final.dot", g.g); if (!hasExtParams(g)) { return; } set<NFAVertex> done; updateReportBounds(rm, g, g.accept, done); updateReportBounds(rm, g, g.acceptEod, done); }
depth findMaxWidth(const NGHolder &h, u32 top) { return findMaxWidth(h, SpecialEdgeFilter(h, top)); }
depth findMaxWidth(const NGHolder &h) { return findMaxWidth(h, SpecialEdgeFilter(h)); }
static u32 findMaxInfixMatches(const NGHolder &h, const set<ue2_literal> &lits) { DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size()); //dumpGraph("infix.dot", h.g); if (!onlyOneTop(h)) { DEBUG_PRINTF("more than one top!n"); return NO_MATCH_LIMIT; } // Indices of vertices that could terminate any of the literals in 'lits'. set<u32> terms; for (const auto &s : lits) { DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str()); if (s.empty()) { // Likely an anchored case, be conservative here. return NO_MATCH_LIMIT; } for (auto v : vertices_range(h)) { if (is_special(v, h)) { continue; } if (couldEndLiteral(s, v, h)) { u32 idx = h[v].index; DEBUG_PRINTF("vertex %u could terminate lit\n", idx); terms.insert(idx); } } } if (terms.empty()) { DEBUG_PRINTF("literals cannot match inside infix\n"); return 0; } NGHolder g; cloneHolder(g, h); vector<NFAVertex> dead; // The set of all edges in the graph is used for existence checks in contractVertex. ue2::unordered_set<pair<NFAVertex, NFAVertex>> all_edges; for (const auto &e : edges_range(g)) { all_edges.emplace(source(e, g), target(e, g)); } for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } if (contains(terms, g[v].index)) { continue; } contractVertex(g, v, all_edges); dead.push_back(v); } remove_vertices(dead, g); //dumpGraph("relaxed.dot", g.g); depth maxWidth = findMaxWidth(g); DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); assert(maxWidth.is_reachable()); if (maxWidth.is_infinite()) { // Cycle detected, so we can likely squeeze an unlimited number of // matches into this graph. return NO_MATCH_LIMIT; } assert(terms.size() >= maxWidth); return maxWidth; }