int test() { int N = 5000 * SIMDBlockSize, gap; __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t)); uint32_t * datain = malloc(N * sizeof(uint32_t)); uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t)); for (gap = 1; gap <= 387420489; gap *= 3) { int k; printf(" gap = %u \n", gap); for (k = 0; k < N; ++k) datain[k] = k * gap; for (k = 0; k * SIMDBlockSize < N; ++k) { /* First part works for general arrays (sorted or unsorted) */ int j; /* we compute the bit width */ const uint32_t b = maxbits(datain + k * SIMDBlockSize); /* we read 128 integers at "datain + k * SIMDBlockSize" and write b 128-bit vectors at "buffer" */ simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b); /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */ simdunpack(buffer, backbuffer, b);/* uncompressed */ for (j = 0; j < SIMDBlockSize; ++j) { if (backbuffer[j] != datain[k * SIMDBlockSize + j]) { printf("bug in simdpack\n"); return -2; } } { /* next part assumes that the data is sorted (uses differential coding) */ uint32_t offset = 0; /* we compute the bit width */ const uint32_t b1 = simdmaxbitsd1(offset, datain + k * SIMDBlockSize); /* we read 128 integers at "datain + k * SIMDBlockSize" and write b1 128-bit vectors at "buffer" */ simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer, b1); /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */ simdunpackd1(offset, buffer, backbuffer, b1); for (j = 0; j < SIMDBlockSize; ++j) { if (backbuffer[j] != datain[k * SIMDBlockSize + j]) { printf("bug in simdpack d1\n"); return -3; } } offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1]; } } } free(buffer); free(datain); free(backbuffer); printf("Code looks good.\n"); return 0; }
/* Used below in more_sophisticated_demo ... */ size_t varying_bit_width_compress(uint32_t * datain, size_t length, uint8_t * buffer) { uint8_t * initout; size_t k; if(length/SIMDBlockSize*SIMDBlockSize != length) { printf("Data length should be a multiple of %i \n",SIMDBlockSize); } initout = buffer; for(k = 0; k < length / SIMDBlockSize; ++k) { uint32_t b = maxbits(datain); *buffer++ = b; simdpackwithoutmask(datain, (__m128i *)buffer, b); datain += SIMDBlockSize; buffer += b * sizeof(__m128i); } return buffer - initout; }
//---------------- FastPFor functions --------------------- unsigned char *simdpackwn(uint32_t *in, uint32_t n, uint32_t b, uint32_t *out) {//checkifdivisibleby(n, 128); const uint32_t * const initout(out); //while(needPaddingTo128Bits(out)) *out++ = 123456; uint32_t *in_; for(in_ = in + n; in + 128 <= in_; in += 128, out += 4 * b) simdpackwithoutmask(in, (__m128i *)out, b); return (unsigned char *)out; }