Ejemplo n.º 1
0
static
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
    size_t offset = 0;
    for (size_t i = 0; i + 1 < len; i++) {
        int diff = 0;
        const char c = lit[i];
        const char d = lit[i + 1];
        if (nocase && ourisalpha(c)) {
            diff = (mytoupper(c) != mytoupper(d));
        } else {
            diff = (c != d);
        }
        offset = i;
        if (diff) {
            break;
        }
    }
    return offset;
}
Ejemplo n.º 2
0
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
                      const vector<u8> &cmp) {
    string::const_reverse_iterator si = s.rbegin();
    vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();

    for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
        u8 c = *si, m = *mi, v = *ci;
        if (nocase && ourisalpha(c)) {
            m &= ~CASE_BIT;
            v &= ~CASE_BIT;
        }

        assert(ci != cmp.rend());
        if ((c & m) != v) {
            DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
            DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
            return false;
        }
    }

    return true;
}
Ejemplo n.º 3
0
static
size_t findNoodFragOffset(const hwlmLiteral &lit) {
    const auto &s = lit.s;
    const size_t len = lit.s.length();

    size_t offset = 0;
    for (size_t i = 0; i + 1 < len; i++) {
        int diff = 0;
        const char c = s[i];
        const char d = s[i + 1];
        if (lit.nocase && ourisalpha(c)) {
            diff = (mytoupper(c) != mytoupper(d));
        } else {
            diff = (c != d);
        }
        offset = i;
        if (diff) {
            break;
        }
    }
    return offset;
}
Ejemplo n.º 4
0
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
                                        const EngineDescription &eng) {
    vector<FDRFlood> tmpFlood(N_CHARS);
    u32 default_suffix = eng.getDefaultFloodSuffixLength();

    // zero everything to avoid spurious distinctions in the compares
    memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));

    for (u32 c = 0; c < N_CHARS; c++) {
        tmpFlood[c].suffix = default_suffix;
    }

    for (const auto &lit : lits) {
        DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
                     lit.nocase ? " (nocase)" : "");
        u32 litSize = verify_u32(lit.s.size());
        u32 maskSize = (u32)lit.msk.size();
        u8 c = lit.s[litSize - 1];
        bool nocase = ourisalpha(c) ? lit.nocase : false;

        if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
            c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
            nocase = false;
        }

        u32 iEnd = MAX(litSize, maskSize);
        u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
                             // for case-less, or as a suffix length for case-sensitive;
        u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
                             // length;

        for (u32 i = 0; i < iEnd; i++) {
            if (i < litSize) {
                if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
                    DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
                                                i, c, lit.s[litSize - i - 1]);
                    upSuffix = MIN(upSuffix, i);
                    loSuffix = MIN(loSuffix, i); // makes sense only for case-less
                    break;
                }
            }
            if (i < maskSize) {
                u8 m = lit.msk[maskSize - i - 1];
                u8 cm = lit.cmp[maskSize - i - 1] & m;
                if(nocase) {
                    if ((mytoupper(c) & m) != cm) {
                        DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
                                                            i, mytoupper(c), cm);
                        upSuffix = MIN(upSuffix, i);
                    }
                    if ((mytolower(c) & m) != cm) {
                        DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
                                                            i, mytolower(c), cm);
                        loSuffix = MIN(loSuffix, i);
                    }
                    if (loSuffix != iEnd && upSuffix != iEnd) {
                        break;
                    }
                } else if ((c & m) != cm) {
                    DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
                    upSuffix = MIN(upSuffix, i);
                    break;
                }
            }
        }
        if(upSuffix != iEnd) {
            updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
        } else {
            addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
        }
        if (nocase) {
            if(loSuffix != iEnd) {
                updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
            } else {
                addFlood(tmpFlood, mytolower(c), lit, loSuffix);
            }
        }
    }

#ifdef DEBUG
    for (u32 i = 0; i < N_CHARS; i++) {
        FDRFlood &fl = tmpFlood[i];
        if (!fl.idCount) {
            continue;
        }

        printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
               "%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
        for (u32 j = 0; j < fl.idCount; j++) {
            printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
                   fl.groups[j], fl.len[j]);
        }
    }
#endif

    map<FDRFlood, CharReach, FloodComparator> flood2chars;
    for (u32 i = 0; i < N_CHARS; i++) {
        FDRFlood fl = tmpFlood[i];
        flood2chars[fl].set(i);
    }

    u32 nDistinctFloods = flood2chars.size();
    size_t floodHeaderSize = sizeof(u32) * N_CHARS;
    size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
    size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
    u8 *buf = (u8 *)aligned_zmalloc(totalSize);
    assert(buf); // otherwise would have thrown std::bad_alloc

    u32 *floodHeader = (u32 *)buf;
    FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);

    u32 currentFloodIndex = 0;
    for (const auto &m : flood2chars) {
        const FDRFlood &fl = m.first;
        const CharReach &cr = m.second;
        layoutFlood[currentFloodIndex] = fl;
        for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
            floodHeader[c] = currentFloodIndex;
        }
        currentFloodIndex++;
    }

    DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
                 floodHeaderSize, floodStructSize, totalSize);

    return make_pair((u8 *)buf, totalSize);
}
Ejemplo n.º 5
0
static
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
    const hwlmLiteral &first = *lits.front();

    struct candidate {
        candidate(void)
            : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
        candidate(const hwlmLiteral &base, u32 offset)
            : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
              b5insens(false), valid(true) {}
        char c1;
        char c2;
        u32 max_offset;
        bool b5insens;
        bool valid;

        bool operator>(const candidate &other) const {
            if (!valid) {
                return false;
            }

            if (!other.valid) {
                return true;
            }

            if (other.cdiffers() && !cdiffers()) {
                return false;
            }

            if (!other.cdiffers() && cdiffers()) {
                return true;
            }

            if (!other.b5insens && b5insens) {
                return false;
            }

            if (other.b5insens && !b5insens) {
                return true;
            }

            if (max_offset > other.max_offset) {
                return false;
            }

            return true;
        }

        bool cdiffers(void) const {
            if (!b5insens) {
                return c1 != c2;
            }
            return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
        }
    };

    candidate best;

    for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
        candidate curr(first, i);

        /* check to see if this pair appears in each string */
        for (const auto &lit_ptr : lits) {
            const hwlmLiteral &lit = *lit_ptr;
            if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
                curr.b5insens = true; /* no choice but to be case insensitive */
            }

            bool found = false;
            bool found_nc = false;
            for (u32 j = 0;
                 !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
                found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
                found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
                    && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);

                if (curr.b5insens) {
                    found = found_nc;
                }
            }

            if (!curr.b5insens && !found && found_nc) {
                curr.b5insens = true;
                found = true;
            }

            if (!found) {
                goto next_candidate;
            }
        }

        /* check to find the max offset where this appears */
        for (const auto &lit_ptr : lits) {
            const hwlmLiteral &lit = *lit_ptr;
            for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
                 j++) {
                bool found = false;
                if (curr.b5insens) {
                    found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
                     && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
                } else {
                    found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
                }

                if (found) {
                    curr.max_offset = MAX(curr.max_offset, j);
                    break;
                }
            }
        }

        if (curr > best) {
            best = curr;
        }

    next_candidate:;
    }

    if (!best.valid) {
        return false;
    }

    aux->dverm.offset = verify_u8(best.max_offset);

    if (!best.b5insens) {
        aux->dverm.accel_type = ACCEL_DVERM;
        aux->dverm.c1 = best.c1;
        aux->dverm.c2 = best.c2;
        DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
                     aux->dverm.c1, aux->dverm.c2);
    } else {
        aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
        aux->dverm.c1 = best.c1 & CASE_CLEAR;
        aux->dverm.c2 = best.c2 & CASE_CLEAR;
        DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
                     aux->dverm.c1, aux->dverm.c2);
    }
    return true;
}