예제 #1
0
int test() {
    int N = 5000 * SIMDBlockSize, gap;
    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
    uint32_t * datain = malloc(N * sizeof(uint32_t));
    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
    for (gap = 1; gap <= 387420489; gap *= 3) {
        int k;
        printf(" gap = %u \n", gap);
        for (k = 0; k < N; ++k)
            datain[k] = k * gap;
        for (k = 0; k * SIMDBlockSize < N; ++k) {
            /*
               First part works for general arrays (sorted or unsorted)
            */
            int j;
       	    /* we compute the bit width */
            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
            /* we read 128 integers at "datain + k * SIMDBlockSize" and
               write b 128-bit vectors at "buffer" */
            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
            simdunpack(buffer, backbuffer, b);/* uncompressed */
            for (j = 0; j < SIMDBlockSize; ++j) {
                if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
                    printf("bug in simdpack\n");
                    return -2;
                }
            }

	    {
                /*
                 next part assumes that the data is sorted (uses differential coding)
                */
                uint32_t offset = 0;
                /* we compute the bit width */
                const uint32_t b1 = simdmaxbitsd1(offset,
                    datain + k * SIMDBlockSize);
               /* we read 128 integers at "datain + k * SIMDBlockSize" and
                  write b1 128-bit vectors at "buffer" */
               simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
                    b1);
               /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
               simdunpackd1(offset, buffer, backbuffer, b1);
               for (j = 0; j < SIMDBlockSize; ++j) {
                   if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
                       printf("bug in simdpack d1\n");
                       return -3;
                   }
               }
               offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
	    }
        }
    }
    free(buffer);
    free(datain);
    free(backbuffer);
    printf("Code looks good.\n");
    return 0;
}
예제 #2
0
파일: example.c 프로젝트: rla3rd/simdcomp
/* Another illustration ... */
void simple_demo() {
  size_t REPEAT = 10, gap;
  size_t N = 1000 * SIMDBlockSize;/* SIMDBlockSize is 128 */
  uint32_t * datain = malloc(N * sizeof(uint32_t));
  size_t compsize;
  clock_t start, end;
  uint8_t * buffer = malloc(N * sizeof(uint32_t) + N / SIMDBlockSize); /* output buffer */
  uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
  printf("== simple demo\n");
  for (gap = 1; gap <= 243; gap *= 3) {
      size_t k, repeat;
      uint32_t offset = 0;
      uint32_t bogus = 0;
      double numberofseconds;

      printf("\n");
      printf(" gap = %lu \n", (unsigned long) gap);
      datain[0] = 0;
      for (k = 1; k < N; ++k)
          datain[k] = datain[k-1] + ( rand() % (gap + 1) );
      compsize = compress(datain,N,buffer);
      printf("compression ratio = %f \n",  (N * sizeof(uint32_t))/ (compsize * 1.0 ));
      start = clock();
      for(repeat = 0; repeat < REPEAT; ++repeat) {
       uint8_t * decbuffer = buffer;
       for (k = 0; k * SIMDBlockSize < N; ++k) {
        uint8_t b = *decbuffer++;
          simdunpackd1(offset, (__m128i *) decbuffer, backbuffer, b);
          /* do something here with backbuffer */
          bogus += backbuffer[3];
          decbuffer += b * sizeof(__m128i);
          offset = backbuffer[SIMDBlockSize - 1];
       }
      }
      end = clock();
      numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
      printf("decoding speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
      start = clock();
      for(repeat = 0; repeat < REPEAT; ++repeat) {
       uint8_t * decbuffer = buffer;
       for (k = 0; k * SIMDBlockSize < N; ++k) {
          memcpy(backbuffer,decbuffer+k*SIMDBlockSize,SIMDBlockSize*sizeof(uint32_t));
          bogus += backbuffer[3] - backbuffer[100];
       }
      }
      end = clock();
      numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
       printf("memcpy speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
     printf("ignore me %i \n",bogus);
     printf("All tests are in CPU cache. Avoid out-of-cache decoding in applications.\n");
  }
  free(buffer);
  free(datain);
  free(backbuffer);
}
예제 #3
0
int test_simdpackedsearch_advanced() {
    uint32_t buffer[128];
    uint32_t backbuffer[128];
	uint32_t out[128];
    uint32_t result = 0;
    uint32_t b, i;
    uint32_t init = 0;
    __m128i initial = _mm_set1_epi32(init);


    /* this test creates delta encoded buffers with different bits, then
     * performs lower bound searches for each key */
    for (b = 0; b <= 32; b++) {
    	uint32_t prev = init;
        /* initialize the buffer */
        for (i = 0; i < 128; i++) {
            buffer[i] =  ((uint32_t)(1431655765 * i + 0xFFFFFFFF)) ;
            if(b < 32) buffer[i] %= (1<<b);
        }

        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);

        for (i = 0; i < 128; i++) {
           buffer[i] = buffer[i] + prev;
           prev = buffer[i];
        }
        for (i = 1; i < 128; i++) {
        	if(buffer[i] < buffer[i-1] )
        		buffer[i] = buffer[i-1];
        }
        assert(simdmaxbitsd1(init, buffer)<=b);
        for (i = 0; i < 128; i++) {
        	out[i] = 0; /* memset would do too */
        }

        /* delta-encode to 'i' bits */
        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
        simdunpackd1(init,  (__m128i *)out, backbuffer, b);

        for (i = 0; i < 128; i++) {
        	assert(buffer[i] == backbuffer[i]);
        }

        printf("advanced simdsearchd1: %d bits\n", b);

        for (i = 0; i < 128; i++) {
        	int pos;
            initial = _mm_set1_epi32(init);
        	pos = simdsearchd1(&initial, (__m128i *)out, b,
                    buffer[i], &result);
        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
                    buffer[i], &result));
        	assert(buffer[pos] == buffer[i]);
            if(pos > 0)
            	assert(buffer[pos - 1] < buffer[i]);
            assert(result == buffer[i]);
        }
        for (i = 0; i < 128; i++) {
        	int pos;
        	if(buffer[i] == 0) continue;
        	initial = _mm_set1_epi32(init);
        	pos = simdsearchd1(&initial, (__m128i *)out, b,
                    buffer[i] - 1, &result);
        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
                    buffer[i] - 1, &result));
        	assert(buffer[pos] >= buffer[i]  - 1);
            if(pos > 0)
            	assert(buffer[pos - 1] < buffer[i]  - 1);
            assert(result == buffer[pos]);
        }
		for (i = 0; i < 128; i++) {
			int pos;
			if (buffer[i] + 1 == 0)
				continue;
			initial = _mm_set1_epi32(init);
			pos = simdsearchd1(&initial, (__m128i *) out, b,
					buffer[i] + 1, &result);
			assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
                    buffer[i] + 1, &result));
			if(pos == 128) {
				assert(buffer[i] == buffer[127]);
			} else {
			  assert(buffer[pos] >= buffer[i] + 1);
			  if (pos > 0)
				assert(buffer[pos - 1] < buffer[i] + 1);
			  assert(result == buffer[pos]);
			}
		}
    }
    printf("advanced simdsearchd1: ok\n");
    return 0;
}
예제 #4
0
파일: ext.c 프로젝트: bowlofstew/TurboPFor
unsigned char *simdunpackn1(uint32_t *in, uint32_t n, uint32_t b, uint32_t start, uint32_t *out) {
  uint32_t k, *out_; 
  for(out_ = out + n; out + 128 <= out_; out += 128, in += 4 * b) simdunpackd1(start, (__m128i *)in, out, b); 
  return (unsigned char *)in;
}