Example #1
0
static
void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
    if (in_degree(v, g) != 1) {
        DEBUG_PRINTF("unexpected pred\n");
        assert(0); /* should be true due to the early stage of this analysis */
        return;
    }

    CharReach &cr = g[v].char_reach;
    if (pred_char == 0xe0) {
        assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
        if (cr == CharReach(0xa0, 0xbf)) {
            cr |= CharReach(0x80, 0x9f);
        }
    } else if (pred_char == 0xf0) {
        assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
        if (cr == CharReach(0x90, 0xbf)) {
            cr |= CharReach(0x80, 0x8f);
        }
    } else if (pred_char == 0xf4) {
        assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
        if (cr == CharReach(0x80, 0x8f)) {
            cr |= CharReach(0x90, 0xbf);
        }
    } else {
        assert(0); /* unexpected pred */
    }
}
Example #2
0
static
bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
                   const dynamic_bitset<> &s,
                   map<dynamic_bitset<>, u32> &visited, u32 age_limit) {
    if (contains(visited, s) && visited[s] >= age_limit) {
        /* we have already (or are in the process) of visiting here with a
         * looser limit. */
        return false;
    }
    visited[s] = age_limit;

    if (s.none()) {
        DEBUG_PRINTF("dead\n");
        return true;
    }

    if (age_limit == 0) {
        return false;
    }

    dynamic_bitset<> all_succ(s.size());
    step(g, info, s, &all_succ);
    all_succ.reset(NODE_START_DOTSTAR);

    for (u32 i = 0; i < N_CHARS; i++) {
        dynamic_bitset<> next = all_succ;
        filter_by_reach(info, &next, CharReach(i));
        if (can_die_early(g, info, next, visited, age_limit - 1)) {
            return true;
        }
    }

    return false;
}
Example #3
0
static
bool expandCyclic(NGHolder &h, NFAVertex v) {
    DEBUG_PRINTF("inspecting %zu\n", h[v].index);
    bool changes = false;

    auto v_preds = preds(v, h);
    auto v_succs = succs(v, h);

    set<NFAVertex> start_siblings;
    set<NFAVertex> end_siblings;

    CharReach &v_cr = h[v].char_reach;

    /* We need to find start vertices which have all of our preds.
     * As we have a self loop, it must be one of our succs. */
    for (auto a : adjacent_vertices_range(v, h)) {
        auto a_preds = preds(a, h);

        if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
            DEBUG_PRINTF("%zu is a start v\n", h[a].index);
            start_siblings.insert(a);
        }
    }

    /* We also need to find full cont vertices which have all our own succs;
     * As we have a self loop, it must be one of our preds. */
    for (auto a : inv_adjacent_vertices_range(v, h)) {
        auto a_succs = succs(a, h);

        if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
            DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index);
            end_siblings.insert(a);
        }
    }

    for (auto s : start_siblings) {
        if (out_degree(s, h) != 1) {
            continue;
        }

        const CharReach &cr = h[s].char_reach;
        if (cr.isSubsetOf(UTF_TWO_START_CR)) {
            if (end_siblings.find(*adjacent_vertices(s, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
            NFAVertex m = *adjacent_vertices(s, h).first;

            if (h[m].char_reach != UTF_CONT_CR
                || out_degree(m, h) != 1) {
                continue;
            }
            if (end_siblings.find(*adjacent_vertices(m, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
            NFAVertex m1 = *adjacent_vertices(s, h).first;

            if (h[m1].char_reach != UTF_CONT_CR
                || out_degree(m1, h) != 1) {
                continue;
            }

            NFAVertex m2 = *adjacent_vertices(m1, h).first;

            if (h[m2].char_reach != UTF_CONT_CR
                || out_degree(m2, h) != 1) {
                continue;
            }

            if (end_siblings.find(*adjacent_vertices(m2, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else {
            DEBUG_PRINTF("%zu is bad\n", h[s].index);
          continue;
        }

        v_cr |= cr;
        clear_vertex(s, h);
        changes = true;
    }

    if (changes) {
        v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
        v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
                         * valid unicode data */
        v_cr.set(0xc1);
        v_cr |= CharReach(0xf5, 0xff);
    }

    return changes;
}
Example #4
0
static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
                            hwlm_group_t expected_groups, AccelAux *aux) {
    DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
    u32 min_len = MAX_ACCEL_OFFSET;
    vector<const hwlmLiteral *> filtered_lits;

    filterLits(lits, expected_groups, &filtered_lits, &min_len);
    if (filtered_lits.empty()) {
        return;
    }

    if (findDVerm(filtered_lits, aux)
        || findSVerm(filtered_lits, aux)) {
        return;
    }

    vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
    for (const auto &lit : lits) {
        if (!(lit.groups & expected_groups)) {
            continue;
        }

        for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
            unsigned char c = lit.s[i];
            if (lit.nocase) {
                DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
                DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
                reach[i].set(mytoupper(c));
                reach[i].set(mytolower(c));
            } else {
                DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
                reach[i].set(c);
            }
        }
    }

    u32 min_count = ~0U;
    u32 min_offset = ~0U;
    for (u32 i = 0; i < min_len; i++) {
        size_t count = reach[i].count();
        DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
                     describeClass(reach[i]).c_str(), count);
        if (count < min_count) {
            min_count = (u32)count;
            min_offset = i;
        }
    }
    assert(min_offset <= min_len);

    if (min_count > MAX_SHUFTI_WIDTH) {
        DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
        return;
    }

    const CharReach &cr = reach[min_offset];
    if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
        DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
                     describeClass(cr).c_str(), cr.count(), min_offset);
        aux->shufti.accel_type = ACCEL_SHUFTI;
        aux->shufti.offset = verify_u8(min_offset);
        return;
    }

    DEBUG_PRINTF("fail\n");
}