int main(int argc, char ** argv) { const int N = 1024 * 32; int alignoffset = 0; if(argc>1) { alignoffset = atoi(argv[1]); printf("alignment offset = %d \n", alignoffset); } char *origbuffer = malloc(N + alignoffset); char *origtmpbuffer = malloc(N + alignoffset); char *buffer = origbuffer + alignoffset; char *tmpbuffer = origtmpbuffer + alignoffset; printf("pointer alignment = %d bytes \n", 1<< __builtin_ctzll((uintptr_t)(const void *)(buffer))); int repeat = 100; size_t howmanywhite = fillwithtext(buffer, N); BEST_TIME_NOCHECK_NOPRE(memcpy(tmpbuffer,buffer,N), repeat, N); printf("\n"); BEST_TIME(despace(buffer, N), N - howmanywhite, howmanywhite = fillwithtext(buffer, N), repeat, N); BEST_TIME(neon_despace(buffer, N), N - howmanywhite, howmanywhite = fillwithtext(buffer, N), repeat, N); BEST_TIME(neon_despace_branchless(buffer, N), N - howmanywhite, howmanywhite = fillwithtext(buffer, N), repeat, N); BEST_TIME(neontbl_despace(buffer, N), N - howmanywhite, howmanywhite = fillwithtext(buffer, N), repeat, N); free(origbuffer); free(origtmpbuffer); }
void demo(int size) { printf("Shuffling arrays of size %d \n",size); printf("Time reported in number of cycles per array element.\n"); printf("Tests assume that array is in cache as much as possible.\n"); int repeat = 500; uint32_t * testvalues = create_random_array(size); uint32_t * pristinecopy = malloc(size * sizeof(uint32_t)); memcpy(pristinecopy,testvalues,sizeof(uint32_t) * size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; BEST_TIME(shuffle_pcg(testvalues,size), array_cache_prefetch(testvalues,size), repeat, size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; BEST_TIME(shuffle_pcg_go(testvalues,size), array_cache_prefetch(testvalues,size), repeat, size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; BEST_TIME(shuffle_pcg_java(testvalues,size), array_cache_prefetch(testvalues,size), repeat, size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; BEST_TIME(shuffle_pcg_divisionless(testvalues,size), array_cache_prefetch(testvalues,size), repeat, size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; BEST_TIME(shuffle_pcg_divisionless_with_slight_bias(testvalues,size), array_cache_prefetch(testvalues,size), repeat, size); if(sortAndCompare(testvalues, pristinecopy, size)!=0) return; free(testvalues); free(pristinecopy); printf("\n"); }
int main() { uint64_t sumofmods = 0; const uint64_t maxval = 1000000; for(uint64_t k = 0; k < maxval; ++k) sumofmods += ( k + sumofmods ) % 23; const int repeat = 5; BEST_TIME(sumofmod23(maxval), sumofmods, repeat, maxval) ; BEST_TIME(fastsumofmod23(maxval), sumofmods, repeat, maxval) ; for(uint64_t x = 1; x !=0; x++) { if(mod23(x) != fastmod23(x)) printf("%x\n",x); } }
void array_shifting() { printf("[array shifting]\n"); int repeat = 5; size_t N = 1000; uint32_t *array = malloc(N * sizeof(uint32_t)); randominit(array, N); const int shiftamount = 3; BEST_TIME(scalarshift(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(vectorshift(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(vectorshift_unrolled(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(varvectorshift(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(varvectorshift_unrolled(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(variablevectorshift(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); BEST_TIME(variablevectorshift_unrolled(array, N, shiftamount), 0, randominit(array, N), repeat, N, true); free(array); }
void demo(uint32_t N) { printf("N = %d\n", N); uint32_t * z = malloc(N * sizeof(uint32_t)); for(uint32_t i = 0 ; i < N; ++i) z[i] = rand(); // some rand. number uint32_t nmbr = 500; uint32_t * accesses = malloc(nmbr * sizeof(uint32_t)); for(uint32_t i = 0 ; i < nmbr; ++i) accesses[i] = rand(); // some rand. number uint32_t expected1 = modsum(z,N,accesses,nmbr); uint32_t expected2 = fastsum(z,N,accesses,nmbr); BEST_TIME(modsum(z,N,accesses,nmbr), expected1, 1000, nmbr); BEST_TIME(fastsum(z,N,accesses,nmbr), expected2, 1000, nmbr); #ifdef __AVX2__ uint32_t expected3 = vectorsum(z,N,accesses,nmbr); if(N % 4 == 0) BEST_TIME(vectorsum(z,N,accesses,nmbr), expected3, 1000, nmbr); uint32_t expected4 = maskedvectorsum(z,N,accesses,nmbr); if(N % 8 == 0) BEST_TIME(maskedvectorsum(z,N,accesses,nmbr), expected4, 1000, nmbr); #endif free(z); free(accesses); }
void demo(size_t N) { printf("string size = %zu \n", N); char *data = (char *)malloc(N); bool expected = true; // it is all ascii? int repeat = 5; printf("We are feeding ascii so it is always going to be ok.\n"); BEST_TIME(is_ascii(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(validate_utf8(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(validate_utf8_branchless(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(validate_utf8_double(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(shiftless_validate_utf8(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(shiftless_validate_utf8_branchless(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(shiftless_validate_utf8_double(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(validate_utf8_sse_nocheating(data, N), expected,populate(data,N) , repeat, N, true); BEST_TIME(validate_utf8_sse(data, N), expected,populate(data,N) , repeat, N, true); free(data); }
int main() { int repeat = 500; int size = TESTSIZE; tellmeall(); printf("array container benchmarks\n"); array_container_t* B = array_container_create(); BEST_TIME(add_test(B), 0, repeat, size); int answer = contains_test(B); size = 1 << 16; BEST_TIME(contains_test(B), answer, repeat, size); size = (1 << 16) / 3; BEST_TIME(remove_test(B), 0, repeat, size); array_container_free(B); for (int howmany = 32; howmany <= (1 << 16); howmany *= 8) { array_container_t* Bt = array_container_create(); for (int j = 0; j < howmany; ++j) { array_container_add(Bt, (uint16_t)pcg32_random()); } size_t nbrtestvalues = 1024; uint16_t* testvalues = malloc(nbrtestvalues * sizeof(uint16_t)); printf("\n number of values in container = %d\n", Bt->cardinality); int card = array_container_cardinality(Bt); uint32_t* out = malloc(sizeof(uint32_t) * (unsigned long)card); BEST_TIME(array_container_to_uint32_array(out, Bt, 1234), card, repeat, card); free(out); BEST_TIME_PRE_ARRAY(Bt, array_container_contains, array_cache_prefetch, testvalues, nbrtestvalues); BEST_TIME_PRE_ARRAY(Bt, array_container_contains, array_cache_flush, testvalues, nbrtestvalues); free(testvalues); array_container_free(Bt); } printf("\n"); array_container_t* B1 = array_container_create(); for (int x = 0; x < 1 << 16; x += 3) { array_container_add(B1, (uint16_t)x); } array_container_t* B2 = array_container_create(); for (int x = 0; x < 1 << 16; x += 5) { array_container_add(B2, (uint16_t)x); } int32_t inputsize = B1->cardinality + B2->cardinality; array_container_t* BO = array_container_create(); printf("\nUnion and intersections...\n"); printf("\nNote:\n"); printf( "union times are expressed in cycles per number of input elements " "(both arrays)\n"); printf( "intersection times are expressed in cycles per number of output " "elements\n\n"); printf("==intersection and union test 1 \n"); printf("input 1 cardinality = %d, input 2 cardinality = %d \n", B1->cardinality, B2->cardinality); answer = union_test(B1, B2, BO); printf("union cardinality = %d \n", answer); printf("B1 card = %d B2 card = %d \n", B1->cardinality, B2->cardinality); BEST_TIME(union_test(B1, B2, BO), answer, repeat, inputsize); answer = intersection_test(B1, B2, BO); printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); printf("==intersection and union test 2 \n"); array_container_clear(B1); array_container_clear(B2); for (int x = 0; x < 1 << 16; x += 16) { array_container_add(B1, (uint16_t)x); } for (int x = 1; x < 1 << 16; x += x) { array_container_add(B2, (uint16_t)x); } printf("input 1 cardinality = %d, input 2 cardinality = %d \n", B1->cardinality, B2->cardinality); answer = union_test(B1, B2, BO); printf("union cardinality = %d \n", answer); printf("B1 card = %d B2 card = %d \n", B1->cardinality, B2->cardinality); BEST_TIME(union_test(B1, B2, BO), answer, repeat, inputsize); answer = intersection_test(B1, B2, BO); printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); array_container_free(B1); array_container_free(B2); array_container_free(BO); return 0; }
int main() { int repeat = 500; int size = (1 << 16) / 3; tellmeall(); printf("bitset container benchmarks\n"); bitset_container_t* B = bitset_container_create(); BEST_TIME(set_test(B), 0, repeat, size); int answer = get_test(B); size = 1 << 16; BEST_TIME(get_test(B), answer, repeat, size); BEST_TIME(bitset_container_cardinality(B), answer, repeat, 1); BEST_TIME(bitset_container_compute_cardinality(B), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); size = (1 << 16) / 3; BEST_TIME(unset_test(B), 0, repeat, size); bitset_container_free(B); for (int howmany = 4096; howmany <= (1 << 16); howmany *= 2) { bitset_container_t* Bt = bitset_container_create(); while (bitset_container_cardinality(Bt) < howmany) { bitset_container_set(Bt, (uint16_t)pcg32_random()); } size_t nbrtestvalues = 1024; uint16_t* testvalues = malloc(nbrtestvalues * sizeof(uint16_t)); printf("\n number of values in container = %d\n", bitset_container_cardinality(Bt)); int card = bitset_container_cardinality(Bt); uint32_t* out = malloc(sizeof(uint32_t) * (unsigned)card + 32); BEST_TIME(bitset_container_to_uint32_array(out, Bt, 1234), card, repeat, card); free(out); BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_prefetch, testvalues, nbrtestvalues); BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_flush, testvalues, nbrtestvalues); free(testvalues); bitset_container_free(Bt); } printf("\n"); bitset_container_t* B1 = bitset_container_create(); for (int x = 0; x < 1 << 16; x += 3) { bitset_container_set(B1, (uint16_t)x); } bitset_container_t* B2 = bitset_container_create(); for (int x = 0; x < 1 << 16; x += 5) { bitset_container_set(B2, (uint16_t)x); } bitset_container_t* BO = bitset_container_create(); BEST_TIME(bitset_container_or_nocard(B1, B2, BO), -1, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); answer = bitset_container_compute_cardinality(BO); BEST_TIME(bitset_container_or(B1, B2, BO), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); BEST_TIME(bitset_container_cardinality(BO), answer, repeat, 1); BEST_TIME(bitset_container_compute_cardinality(BO), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); BEST_TIME(bitset_container_and_nocard(B1, B2, BO), -1, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); answer = bitset_container_compute_cardinality(BO); BEST_TIME(bitset_container_and(B1, B2, BO), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); BEST_TIME(bitset_container_cardinality(BO), answer, repeat, 1); BEST_TIME(bitset_container_compute_cardinality(BO), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); // next we are going to benchmark conversion from bitset to array (an // important step) bitset_container_clear(B1); for (int k = 0; k < 4096; ++k) { bitset_container_set(B1, (uint16_t)ranged_random(1 << 16)); } answer = get_cardinality_through_conversion_to_array(B1); BEST_TIME(get_cardinality_through_conversion_to_array(B1), answer, repeat, BITSET_CONTAINER_SIZE_IN_WORDS); bitset_container_free(BO); bitset_container_free(B1); bitset_container_free(B2); return 0; }