예제 #1
0
/** Returns the maximum width in bytes of an input that will match the given
 * graph. If there is no maximum width, returns infinity. */
depth findMaxWidth(const NGHolder &h) {
    depth startDepth = findMaxWidth(h, h.start);
    depth dotstarDepth = findMaxWidth(h, h.startDs);
    DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
                 dotstarDepth.str().c_str());
    if (startDepth.is_unreachable()) {
        return dotstarDepth;
    } else if (dotstarDepth.is_unreachable()) {
        return startDepth;
    } else {
        return max(startDepth, dotstarDepth);
    }
}
예제 #2
0
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
                                      UNUSED som_type som) {
    const depth max_width = findMaxWidth(castle);
    DEBUG_PRINTF("castle has reach %s and max width %s\n",
                  describeClass(castle.reach()).c_str(),
                  max_width.str().c_str());

    const CharReach escape = ~castle.reach(); // invert reach for stop chars.

    u32 d = min(max_width, depth(MAX_STOP_DEPTH));
    const u8 mask = verify_u8((1U << d) - 1);

    vector<u8> stop(N_CHARS, 0);

    for (size_t c = escape.find_first(); c != escape.npos;
         c = escape.find_next(c)) {
        stop[c] |= mask;
    }

    return stop;
}
예제 #3
0
/** Make package appropriate size. */
void GlyphPacker::measure() {
	
    package.metrics.width = findMaxWidth() + GLYPH_PACKER_PADDING;
    package.metrics.height = factory->getFontHeight() + GLYPH_PACKER_PADDING;
    package.metrics.descent = factory->getFontDescent();
    
    package.rows = 1;
    package.cols = 1;
    package.width = package.metrics.width;
    package.height = package.metrics.height;
    
    for (int tally=1; tally<GLYPH_PACKER_GLYPHS_COUNT; ) {
       if (package.width < package.height) {
          package.width += package.metrics.width;
          tally += package.rows;
          ++(package.cols);
       } else {
          package.height += package.metrics.height;
          tally += package.cols;
          ++(package.rows);
       }
    }
}
예제 #4
0
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
                          UNUSED const CompileContext &cc) {
    if (!hasExtParams(g)) {
        return;
    }

    depth minWidth = findMinWidth(g);
    depth maxWidth = findMaxWidth(g);
    bool is_anchored = !has_proper_successor(g.startDs, g)
                     && out_degree(g.start, g);
    bool has_offset_adj = hasOffsetAdjustments(rm, g);

    DEBUG_PRINTF("minWidth=%s, maxWidth=%s, anchored=%d, offset_adj=%d\n",
                 minWidth.str().c_str(), maxWidth.str().c_str(), is_anchored,
                 has_offset_adj);

    DepthMinMax match_depths = findMatchLengths(rm, g);
    DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());

    if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) {
        ostringstream oss;
        oss << "Expression is anchored and cannot satisfy min_offset="
            << g.min_offset << " as it can only produce matches of length "
            << maxWidth << " bytes at most.";
        throw CompileError(g.expressionIndex, oss.str());
    }

    if (minWidth > g.max_offset) {
        ostringstream oss;
        oss << "Expression has max_offset=" << g.max_offset << " but requires "
             << minWidth << " bytes to match.";
        throw CompileError(g.expressionIndex, oss.str());
    }

    if (maxWidth.is_finite() && match_depths.max < g.min_length) {
        ostringstream oss;
        oss << "Expression has min_length=" << g.min_length << " but can "
            "only produce matches of length " << match_depths.max <<
            " bytes at most.";
        throw CompileError(g.expressionIndex, oss.str());
    }

    if (g.min_length && g.min_length <= match_depths.min) {
        DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
                     g.min_length);
        g.min_length = 0;
    }

    if (!hasExtParams(g)) {
        return;
    }

    pruneVacuousEdges(g);
    pruneUnmatchable(g, rm);

    if (!has_offset_adj) {
        pruneExtUnreachable(g);
    }

    // We may have removed all the edges to accept, in which case this
    // expression cannot match.
    if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
        throw CompileError(g.expressionIndex, "Extended parameter "
                "constraints can not be satisfied for any match from "
                "this expression.");
    }

    // Remove reports on vertices without an edge to accept (which have been
    // pruned above).
    clearReports(g);

    // Recalc.
    minWidth = findMinWidth(g);
    maxWidth = findMaxWidth(g);
    is_anchored = proper_out_degree(g.startDs, g) == 0 &&
                  out_degree(g.start, g);
    has_offset_adj = hasOffsetAdjustments(rm, g);

    // If the pattern is completely anchored and has a min_length set, this can
    // be converted to a min_offset.
    if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) {
        DEBUG_PRINTF("converting min_length to min_offset=%llu for "
                     "anchored case\n", g.min_length);
        g.min_offset = g.min_length;
        g.min_length = 0;
    }

    if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) {
        DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
                     g.min_offset);
        g.min_offset = 0;
    }

    if (!hasExtParams(g)) {
        return;
    }

    // If the pattern has a min_length and is of "ratchet" form with one
    // unbounded repeat, that repeat can become a bounded repeat.
    // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
    if (g.min_length && transformMinLengthToRepeat(rm, g)) {
        DEBUG_PRINTF("converted min_length to bounded repeat\n");
        // recalc
        minWidth = findMinWidth(g);
    }

    // If the pattern is unanchored, has a max_offset and has not asked for
    // SOM, we can use that knowledge to anchor it which will limit its
    // lifespan. Note that we can't use this transformation if there's a
    // min_length, as it's currently handled using "sly SOM".

    // Note that it is possible to handle graphs that have a combination of
    // anchored and unanchored paths, but it's too tricky for the moment.

    if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length &&
                !has_offset_adj && isUnanchored(g)) {
        if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) {
            DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
                         maxWidth.str().c_str());
            if (minWidth == maxWidth) {
                // For a fixed width pattern, we can retire the offsets as they
                // are implicit in the graph now.
                g.min_offset = 0;
                g.max_offset = MAX_OFFSET;
            }
        }
    }
    //dumpGraph("final.dot", g.g);

    if (!hasExtParams(g)) {
        return;
    }

    set<NFAVertex> done;
    updateReportBounds(rm, g, g.accept, done);
    updateReportBounds(rm, g, g.acceptEod, done);
}
예제 #5
0
depth findMaxWidth(const NGHolder &h, u32 top) {
    return findMaxWidth(h, SpecialEdgeFilter(h, top));
}
예제 #6
0
depth findMaxWidth(const NGHolder &h) {
    return findMaxWidth(h, SpecialEdgeFilter(h));
}
예제 #7
0
static
u32 findMaxInfixMatches(const NGHolder &h, const set<ue2_literal> &lits) {
    DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
    //dumpGraph("infix.dot", h.g);

    if (!onlyOneTop(h)) {
        DEBUG_PRINTF("more than one top!n");
        return NO_MATCH_LIMIT;
    }

    // Indices of vertices that could terminate any of the literals in 'lits'.
    set<u32> terms;

    for (const auto &s : lits) {
        DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
        if (s.empty()) {
            // Likely an anchored case, be conservative here.
            return NO_MATCH_LIMIT;
        }

        for (auto v : vertices_range(h)) {
            if (is_special(v, h)) {
                continue;
            }

            if (couldEndLiteral(s, v, h)) {
                u32 idx = h[v].index;
                DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
                terms.insert(idx);
            }
        }
    }

    if (terms.empty()) {
        DEBUG_PRINTF("literals cannot match inside infix\n");
        return 0;
    }

    NGHolder g;
    cloneHolder(g, h);
    vector<NFAVertex> dead;

    // The set of all edges in the graph is used for existence checks in contractVertex.
    ue2::unordered_set<pair<NFAVertex, NFAVertex>> all_edges;
    for (const auto &e : edges_range(g)) {
        all_edges.emplace(source(e, g), target(e, g));
    }

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }
        if (contains(terms, g[v].index)) {
            continue;
        }

        contractVertex(g, v, all_edges);
        dead.push_back(v);
    }

    remove_vertices(dead, g);
    //dumpGraph("relaxed.dot", g.g);

    depth maxWidth = findMaxWidth(g);
    DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
    assert(maxWidth.is_reachable());

    if (maxWidth.is_infinite()) {
        // Cycle detected, so we can likely squeeze an unlimited number of
        // matches into this graph.
        return NO_MATCH_LIMIT;
    }

    assert(terms.size() >= maxWidth);
    return maxWidth;
}