Beispiel #1
0
/** Find the set of characters that are not present in the reachability of
 * graph \p g after a certain depth (currently 8). If a character in this set
 * is encountered, it means that the NFA is either dead or has not progressed
 * more than 8 characters from its start states. */
CharReach findStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    CharReach stopcr;

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }

        if (depths.maxDist(g, v) >= max_depth) {
            if (som == SOM_NONE) {
                stopcr |= reduced_cr(v, g, no_vertices);
            } else {
                stopcr |= g[v].char_reach;
            }
        }
    }

    // Turn alphabet into stops.
    stopcr.flip();

    return stopcr;
}
Beispiel #2
0
TEST(ng_charreach, flip) {
    CharReach cr;

    ASSERT_EQ(0U, cr.count());
    ASSERT_TRUE(cr.none());
    cr.flip();
    ASSERT_EQ(cr.size(), cr.count());
    ASSERT_TRUE(cr.all());
    cr.flip();
    ASSERT_EQ(0U, cr.count());
    ASSERT_TRUE(cr.none());
    cr.flip(25);
    ASSERT_FALSE(cr.none());
    ASSERT_FALSE(cr.all());
    ASSERT_EQ(1U, cr.count());
    cr.flip();
    ASSERT_EQ(cr.size() - 1, cr.count());
}
Beispiel #3
0
TEST(ng_charreach, string) {
    CharReach cr;

    cr.set(1);
    cr.set(2);
    cr.set('a');
    cr.set('Z');
    cr.set('m');
    cr.set('~');
    cr.set(210);
    ASSERT_FALSE(cr.isAlpha());
    cr.flip(1);
    cr.flip(2);
    cr.flip('~');
    cr.flip(210);
    ASSERT_TRUE(cr.isAlpha());

    ASSERT_EQ("Zam", cr.to_string());
}
void AsciiComponentClass::add(PredefinedClass c, bool negative) {
    if (in_cand_range) { // can't form a range here
        throw LocatedParseError("Invalid range in character class");
    }
    DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : "");

    if (mode.ucp) {
        c = translateForUcpMode(c, mode);
    }

    // Note: caselessness is handled by getPredefinedCharReach.
    CharReach pcr = getPredefinedCharReach(c, mode);
    if (negative) {
        pcr.flip();
    }

    cr |= pcr;
    range_start = INVALID_UNICODE;
    in_cand_range = false;
}