/*ARGSUSED*/ int lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) { const char *src = s_start; uint32_t bufsiz = BE_IN32(src); /* invalid compressed buffer size encoded at start */ if (bufsiz + sizeof (bufsiz) > s_len) return (1); /* * Returns 0 on success (decompression function returned non-negative) * and non-zero on failure (decompression function returned negative. */ return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], d_start, bufsiz, d_len) < 0); }
/* * The algorithm to calculate RSS Toeplitz hash is essentially as follows: * - Regard a Toeplitz key and an input as bit strings, with the * most significant bit of the first byte being the first bit * - Let's have a 32-bit window sliding over the Toeplitz key bit by bit * - Let the initial value of the hash be zero * - Then for every bit in the input that is set to 1, XOR the value of the * window at a given bit position into the resulting hash * * First we note that since XOR is commutative and associative, the * resulting hash is just a XOR of subhashes for every input bit: * H = H_0 XOR H_1 XOR ... XOR H_n (1) * Then we note that every H_i is only dependent on the value of i and * the value of i'th bit of input, but not on any preceding or following * input bits. * Then we note that (1) holds also for any bit sequences, * e.g. for bytes of input: * H = H_0_7 XOR H_8_15 XOR ... XOR H_(n-7)_n (2) * and every * H_i_j = H_i XOR H_(i+1) ... XOR H_j. (3) * * It naturally follows than H_i_(i+7) only depends on the value of the byte * and the position of the byte in the input. * Therefore we may pre-calculate the value of each byte sub-hash H_i_(i+7) * for each possible byte value and each possible byte input position, and * then just assemble the hash of the packet byte-by-byte instead of * bit-by-bit. * * The amount of memory required for such a cache is not prohibitive: * - we have at most 36 bytes of input, each holding 256 possible values * - and the hash is 32-bit wide * - hence, we need only 36 * 256 * 4 = 36kBytes of cache. * * The performance gain, at least on synthetic benchmarks, is significant: * cache lookup is about 15 times faster than direct hash calculation */ const uint32_t * toeplitz_cache_init(const uint8_t *key) { uint32_t *cache = kmem_alloc(SFXGE_TOEPLITZ_CACHE_SIZE * sizeof (uint32_t), KM_SLEEP); unsigned i; for (i = 0; i < SFXGE_TOEPLITZ_IN_MAX; i++, key++) { uint32_t key_bits[NBBY] = { 0 }; unsigned j; unsigned mask; unsigned byte; #if defined(BE_IN32) key_bits[0] = BE_IN32(key); #else key_bits[0] = BE_32(*(uint32_t *)key); #endif for (j = 1, mask = 1 << (NBBY - 1); j < NBBY; j++, mask >>= 1) { key_bits[j] = key_bits[j - 1] << 1; if ((key[sizeof (uint32_t)] & mask) != 0) key_bits[j] |= 1; } for (byte = 0; byte <= UINT8_MAX; byte++) { uint32_t res = 0; for (j = 0, mask = 1 << (NBBY - 1); j < NBBY; j++, mask >>= 1) { if (byte & mask) res ^= key_bits[j]; } cache[i * (UINT8_MAX + 1) + byte] = res; } } return (cache); }