Exemplo n.º 1
0
size_t buffer_find_nonzero_offset(const void *buf, size_t len)
{
    const VECTYPE *p = buf;
    const VECTYPE zero = (VECTYPE){0};
    size_t i;

    assert(can_use_buffer_find_nonzero_offset(buf, len));

    if (!len) {
        return 0;
    }

    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
        if (!ALL_EQ(p[i], zero)) {
            return i * sizeof(VECTYPE);
        }
    }

    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
         i < len / sizeof(VECTYPE);
         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
        VECTYPE tmp0 = p[i + 0] | p[i + 1];
        VECTYPE tmp1 = p[i + 2] | p[i + 3];
        VECTYPE tmp2 = p[i + 4] | p[i + 5];
        VECTYPE tmp3 = p[i + 6] | p[i + 7];
        VECTYPE tmp01 = tmp0 | tmp1;
        VECTYPE tmp23 = tmp2 | tmp3;
        if (!ALL_EQ(tmp01 | tmp23, zero)) {
            break;
        }
    }

    return i * sizeof(VECTYPE);
}
Exemplo n.º 2
0
/*
 * Checks if a buffer is all zeroes
 *
 * Attention! The len must be a multiple of 4 * sizeof(long) due to
 * restriction of optimizations in this function.
 */
bool buffer_is_zero(const void *buf, size_t len)
{
    /*
     * Use long as the biggest available internal data type that fits into the
     * CPU register and unroll the loop to smooth out the effect of memory
     * latency.
     */

    size_t i;
    long d0, d1, d2, d3;
    const long * const data = buf;

    /* use vector optimized zero check if possible */
    if (can_use_buffer_find_nonzero_offset(buf, len)) {
        return buffer_find_nonzero_offset(buf, len) == len;
    }

    assert(len % (4 * sizeof(long)) == 0);
    len /= sizeof(long);

    for (i = 0; i < len; i += 4) {
        d0 = data[i + 0];
        d1 = data[i + 1];
        d2 = data[i + 2];
        d3 = data[i + 3];

        if (d0 || d1 || d2 || d3) {
            return false;
        }
    }

    return true;
}
Exemplo n.º 3
0
size_t buffer_find_nonzero_offset(const void *buf, size_t len)
{
    const VECTYPE *p = buf;
    const VECTYPE zero = (VECTYPE){0};
    size_t i;

    assert(can_use_buffer_find_nonzero_offset(buf, len));

    if (!len) {
        return 0;
    }

    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
        if (!ALL_EQ(p[i], zero)) {
            return i * sizeof(VECTYPE);
        }
    }

    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
         i < len / sizeof(VECTYPE);
         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
        VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
        VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
        VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
        VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
        VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
        VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
        if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
            break;
        }
    }

    return i * sizeof(VECTYPE);
}