Ejemplo n.º 1
0
void describeEdge(FILE *f, const u16 *t, u16 i) {
    for (u16 s = 0; s < N_CHARS; s++) {
        if (!t[s]) {
            continue;
        }

        u16 ss;
        for (ss = 0; ss < s; ss++) {
            if (t[s] == t[ss]) {
                break;
            }
        }

        if (ss != s) {
            continue;
        }

        CharReach reach;
        for (ss = s; ss < 256; ss++) {
            if (t[s] == t[ss]) {
                reach.set(ss);
            }
        }

        fprintf(f, "%u -> %u [ label = \"", i, t[s]);

        describeClass(f, reach, 5, CC_OUT_DOT);

        fprintf(f, "\" ];\n");
    }
}
Ejemplo n.º 2
0
static
void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
                     const inputT &input, dynamic_bitset<> *states,
                     bool kill_sds) {
    dynamic_bitset<> &curr = *states;
    dynamic_bitset<> next(curr.size());
    DEBUG_PRINTF("%zu states in\n", states->count());

    for (const auto &e : input) {
        DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
        step(g, info, curr, &next);
        if (kill_sds) {
            next.reset(NODE_START_DOTSTAR);
        }
        filter_by_reach(info, &next, e);
        next.swap(curr);

        if (curr.empty()) {
            DEBUG_PRINTF("went dead\n");
            break;
        }
    }

    DEBUG_PRINTF("%zu states out\n", states->size());
}
Ejemplo n.º 3
0
static
void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
    for (u16 s = 0; s < N_CHARS; s++) {
        if (!t[s]) {
            continue;
        }

        u16 ss;
        for (ss = 0; ss < s; ss++) {
            if (t[s] == t[ss]) {
                break;
            }
        }

        if (ss != s) {
            continue;
        }

        CharReach reach;
        for (ss = s; ss < 256; ss++) {
            if (t[s] == t[ss]) {
                reach.set(ss);
            }
        }

        fprintf(f, "%u -> %u [ ", i, t[s]);
        if (i < m->sheng_end && t[s] < m->sheng_end) {
            fprintf(f, "color = red, fontcolor = red ");
        }
        fprintf(f, "label = \"");
        describeClass(f, reach, 5, CC_OUT_DOT);

        fprintf(f, "\" ];\n");
    }
}
Ejemplo n.º 4
0
static UNUSED
string dumpTrigger(const vector<CharReach> &trigger) {
    string s;
    for (const auto &cr : trigger) {
        s += describeClass(cr);
    }
    return s;
}
Ejemplo n.º 5
0
static UNUSED
string dump(const map<s32, CharReach> &look) {
    ostringstream oss;
    for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
        if (it != look.begin()) {
            oss << ", ";
        }
        oss << "{" << it->first << ": " << describeClass(it->second) << "}";
    }
    return oss.str();
}
Ejemplo n.º 6
0
static
bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
    for (const auto &m : look) {
        const CharReach &look_cr = m.second;
        if (!overlaps(look_cr, flood_cr)) {
            return false;
        }
    }
    DEBUG_PRINTF("look can't escape flood on %s\n",
                  describeClass(flood_cr).c_str());
    return true;
}
Ejemplo n.º 7
0
static
void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
                    set<CharReach> &flood_reach) {
    for (u32 lit_id : tbi.g[v].literals) {
        const ue2_literal &s = tbi.literals.right.at(lit_id).s;
        if (s.empty()) {
            continue;
        }
        if (is_flood(s)) {
            CharReach cr(*s.begin());
            DEBUG_PRINTF("flood-prone with reach: %s\n",
                          describeClass(cr).c_str());
            flood_reach.insert(cr);
        }
    }
}
Ejemplo n.º 8
0
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_SHUF);

    StdioFile f(base + ".txt", "w");

    const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
    lbrDumpCommon(&ls->common, f);

    CharReach cr = shufti2cr((const u8 *)&ls->mask_lo,
                             (const u8 *)&ls->mask_hi);
    fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Ejemplo n.º 9
0
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_TRUF);

    StdioFile f(base + ".txt", "w");

    const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
    lbrDumpCommon(&lt->common, f);

    CharReach cr = truffle2cr((const u8 *)&lt->mask1,
                              (const u8 *)&lt->mask2);
    fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Ejemplo n.º 10
0
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
 * build an eight-bit mask per character C, with each bit representing the
 * depth before the location of character C (if encountered) that the NFA would
 * be in a predictable start state. */
vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    vector<CharReach> reach(MAX_STOP_DEPTH);

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }
        CharReach v_cr;
        if (som == SOM_NONE) {
            v_cr = reduced_cr(v, g, no_vertices);
        } else {
            v_cr = g[v].char_reach;
        }

        u32 d = min(max_depth, depths.maxDist(g, v));
        for (u32 i = 0; i < d; i++) {
            reach[i] |= v_cr;
        }
    }

#ifdef DEBUG
    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        DEBUG_PRINTF("depth %u, stop chars: ", i);
        describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
        printf("\n");
    }
#endif

    vector<u8> stop(N_CHARS, 0);

    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        CharReach cr = ~reach[i]; // invert reach for stop chars.
        const u8 mask = 1U << i;
        for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
            stop[c] |= mask;
        }
    }

    return stop;
}
Ejemplo n.º 11
0
void describeAlphabet(FILE *f, const mcclellan *m) {
    map<u8, CharReach> rev;

    for (u16 i = 0; i < N_CHARS; i++) {
        rev[m->remap[i]].clear();
    }

    for (u16 i = 0; i < N_CHARS; i++) {
        rev[m->remap[i]].set(i);
    }

    map<u8, CharReach>::const_iterator it;
    fprintf(f, "\nAlphabet\n");
    for (it = rev.begin(); it != rev.end(); ++it) {
        fprintf(f, "%3hhu: ", it->first);
        describeClass(f, it->second, 10240, CC_OUT_TEXT);
        fprintf(f, "\n");
    }
    fprintf(f, "\n");
}
Ejemplo n.º 12
0
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
                                      UNUSED som_type som) {
    const depth max_width = findMaxWidth(castle);
    DEBUG_PRINTF("castle has reach %s and max width %s\n",
                  describeClass(castle.reach()).c_str(),
                  max_width.str().c_str());

    const CharReach escape = ~castle.reach(); // invert reach for stop chars.

    u32 d = min(max_width, depth(MAX_STOP_DEPTH));
    const u8 mask = verify_u8((1U << d) - 1);

    vector<u8> stop(N_CHARS, 0);

    for (size_t c = escape.find_first(); c != escape.npos;
         c = escape.find_next(c)) {
        stop[c] |= mask;
    }

    return stop;
}
Ejemplo n.º 13
0
static
aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
                                     const depth &repeatMin,
                                     const depth &repeatMax, u32 minPeriod,
                                     bool is_reset, ReportID report) {
    DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
                 repeatMin.str().c_str(), repeatMax.str().c_str(),
                 describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
                 report);
    assert(repeatMin <= repeatMax);
    assert(repeatMax.is_reachable());

    aligned_unique_ptr<NFA> nfa
        = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);

    if (!nfa) {
        nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
                            report);
    }
    if (!nfa) {
        nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }
    if (!nfa) {
        nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
                           report);
    }

    if (!nfa) {
        assert(0);
        return nullptr;
    }

    return nfa;
}
Ejemplo n.º 14
0
void mergeLookaround(vector<LookEntry> &lookaround,
                     const vector<LookEntry> &more_lookaround) {
    if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
        DEBUG_PRINTF("big enough!\n");
        return;
    }

    // Don't merge lookarounds at offsets we already have entries for.
    ue2::flat_set<s8> offsets;
    for (const auto &e : lookaround) {
        offsets.insert(e.offset);
    }

    map<s32, CharReach> more;
    LookPriority cmp(more);
    priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
    for (const auto &e : more_lookaround) {
        if (!contains(offsets, e.offset)) {
            more.emplace(e.offset, e.reach);
            pq.push(e.offset);
        }
    }

    while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
        const s32 offset = pq.top();
        pq.pop();
        const auto &cr = more.at(offset);
        DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
        lookaround.emplace_back(verify_s8(offset), cr);
    }

    // Order by offset.
    sort(begin(lookaround), end(lookaround),
         [](const LookEntry &a, const LookEntry &b) {
             return a.offset < b.offset;
         });
}
Ejemplo n.º 15
0
static
u32 findMaxInfixMatches(const CastleProto &castle,
                        const set<ue2_literal> &lits) {
    DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());

    if (castle.repeats.size() > 1) {
        DEBUG_PRINTF("more than one top!\n");
        return NO_MATCH_LIMIT;
    }

    assert(!castle.repeats.empty());
    const PureRepeat &pr = castle.repeats.begin()->second;
    DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
                 describeClass(pr.reach).c_str());

    size_t max_count = 0;

    for (const auto &s : lits) {
        DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
        if (s.empty()) {
            // Likely an anchored case, be conservative here.
            return NO_MATCH_LIMIT;
        }

        size_t count = 0;

        auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));

        if (f == s.rbegin()) {
            DEBUG_PRINTF("lit can't terminate inside infix\n");
            count = 0;
        } else if (f != s.rend()) {
            size_t suffix_len = distance(s.rbegin(), f);
            DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
            if (pr.bounds.max.is_finite()) {
                count = min(suffix_len, (size_t)pr.bounds.max);
            } else {
                count = suffix_len;
            }
        } else {
            DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
            if (pr.bounds.max.is_finite()) {
                count = pr.bounds.max;
            } else {
                DEBUG_PRINTF("inf bound\n");
                return NO_MATCH_LIMIT;
            }
        }

        DEBUG_PRINTF("count=%zu\n", count);
        max_count = max(max_count, count);
    }

    DEBUG_PRINTF("max_count %zu\n", max_count);

    if (max_count > NO_MATCH_LIMIT) {
        assert(0); // This would be a surprise.
        return NO_MATCH_LIMIT;
    }

    return (u32)max_count;
}
Ejemplo n.º 16
0
static
void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
    if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
        return;
    }

    DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());

    // First, remove floods that we already can't escape; they shouldn't affect
    // the analysis below.
    for (auto it = flood_reach.begin(); it != flood_reach.end();) {
        if (isFloodProne(look, *it)) {
            DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
                         describeClass(*it).c_str());
            flood_reach.erase(it++);
        } else {
            ++it;
        }
    }

    LookPriority cmp(look);
    priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
    for (const auto &m : look) {
        pq.push(m.first);
    }

    while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
        s32 d = pq.top();
        assert(contains(look, d));
        const CharReach cr(look[d]); // copy
        pq.pop();

        DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
        look.erase(d);

        // If removing this entry would result in us becoming flood_prone on a
        // particular flood_reach case, reinstate it and move on.
        if (isFloodProne(look, flood_reach)) {
            DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
                         describeClass(cr).c_str());
            look.insert(make_pair(d, cr));
        }
    }

    while (!pq.empty()) {
        s32 d = pq.top();
        assert(contains(look, d));
        const CharReach cr(look[d]); // copy
        pq.pop();

        if (cr.count() < LOOKAROUND_WIDE_REACH) {
            continue;
        }

        DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
        look.erase(d);

        // If removing this entry would result in us becoming flood_prone on a
        // particular flood_reach case, reinstate it and move on.
        if (isFloodProne(look, flood_reach)) {
            DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
                         describeClass(cr).c_str());
            look.insert(make_pair(d, cr));
        }
    }

    DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
}
Ejemplo n.º 17
0
static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
                            hwlm_group_t expected_groups, AccelAux *aux) {
    DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
    u32 min_len = MAX_ACCEL_OFFSET;
    vector<const hwlmLiteral *> filtered_lits;

    filterLits(lits, expected_groups, &filtered_lits, &min_len);
    if (filtered_lits.empty()) {
        return;
    }

    if (findDVerm(filtered_lits, aux)
        || findSVerm(filtered_lits, aux)) {
        return;
    }

    vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
    for (const auto &lit : lits) {
        if (!(lit.groups & expected_groups)) {
            continue;
        }

        for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
            unsigned char c = lit.s[i];
            if (lit.nocase) {
                DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
                DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
                reach[i].set(mytoupper(c));
                reach[i].set(mytolower(c));
            } else {
                DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
                reach[i].set(c);
            }
        }
    }

    u32 min_count = ~0U;
    u32 min_offset = ~0U;
    for (u32 i = 0; i < min_len; i++) {
        size_t count = reach[i].count();
        DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
                     describeClass(reach[i]).c_str(), count);
        if (count < min_count) {
            min_count = (u32)count;
            min_offset = i;
        }
    }
    assert(min_offset <= min_len);

    if (min_count > MAX_SHUFTI_WIDTH) {
        DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
        return;
    }

    const CharReach &cr = reach[min_offset];
    if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
        DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
                     describeClass(cr).c_str(), cr.count(), min_offset);
        aux->shufti.accel_type = ACCEL_SHUFTI;
        aux->shufti.offset = verify_u8(min_offset);
        return;
    }

    DEBUG_PRINTF("fail\n");
}