void* test(void* thread) { size_t num_retry_cas1 = 0, num_retry_cas2 = 0, num_retry_cas3 = 0 , num_retry_cas4 = 0, num_retry_cas5 = 0; thread_data_t* td = (thread_data_t*) thread; uint8_t ID = td->id; phys_id = the_cores[ID % (NUMBER_OF_SOCKETS * CORES_PER_SOCKET)]; set_cpu(phys_id); ssmem_allocator_t* alloc = (ssmem_allocator_t*) memalign(CACHE_LINE_SIZE, sizeof(ssmem_allocator_t)); assert(alloc != NULL); ssmem_alloc_init(alloc, SSMEM_DEFAULT_MEM_SIZE, ID); ssmem_gc_thread_init(alloc, ID); PF_INIT(3, SSPFD_NUM_ENTRIES, ID); #if defined(COMPUTE_LATENCY) volatile ticks my_putting_succ = 0; volatile ticks my_putting_fail = 0; volatile ticks my_getting_succ = 0; volatile ticks my_getting_fail = 0; volatile ticks my_removing_succ = 0; volatile ticks my_removing_fail = 0; #endif uint64_t my_putting_count = 0; uint64_t my_getting_count = 0; uint64_t my_removing_count = 0; uint64_t my_putting_count_succ = 0; uint64_t my_getting_count_succ = 0; uint64_t my_removing_count_succ = 0; #if defined(COMPUTE_LATENCY) && PFD_TYPE == 0 volatile ticks start_acq, end_acq; volatile ticks correction = getticks_correction_calc(); #endif seeds = seed_rand(); MEM_BARRIER; barrier_cross(&barrier); barrier_cross(&barrier_global); size_t obj_size_bytes = obj_size * sizeof(size_t); volatile size_t* dat = (size_t*) malloc(obj_size_bytes); assert(dat != NULL); size_t* obj = NULL; while (stop == 0) { size_t rand = (my_random(&(seeds[0]), &(seeds[1]), &(seeds[2]))); size_t k = (rand & 1) + 2; rand &= 1023; /* search baby! */ int i; for (i = 0; i < KEY_BUCKT; i++) { volatile uintptr_t v = val[i]; if (snap->map[i] == MAP_VALID && key[i] == k) { if (val[i] == v) { if (GET_VAL(v) != k) { printf("[%02d] :get: key != val for %zu\n", ID, k); } break; } } } if (rand > 513) { my_putting_count++; if (obj != NULL) { ssmem_free(alloc, (void*) obj); } obj = ssmem_alloc(alloc, 8); *obj = k; int empty_index = -2; clht_snapshot_t s; retry: s.snapshot = snap->snapshot; int i; for (i = 0; i < KEY_BUCKT; i++) { volatile uintptr_t v = val[i]; if (snap->map[i] == MAP_VALID && key[i] == k) { if (val[i] == v) { if (empty_index > 0) { snap->map[empty_index] = MAP_INVLD; } goto end; } } } clht_snapshot_all_t s1; if (empty_index < 0) { empty_index = snap_get_empty_index(s.snapshot); if (empty_index < 0) { num_retry_cas1++; goto end; } s1 = snap_set_map(s.snapshot, empty_index, MAP_INSRT); if (CAS_U64(&snap->snapshot, s.snapshot, s1) != s.snapshot) { empty_index = -2; num_retry_cas2++; goto retry; } val[empty_index] = (uintptr_t) obj; key[empty_index] = k; } else { s1 = snap_set_map(s.snapshot, empty_index, MAP_INSRT); } clht_snapshot_all_t s2 = snap_set_map_and_inc_version(s1, empty_index, MAP_VALID); if (CAS_U64(&snap->snapshot, s1, s2) != s1) { num_retry_cas3++; /* key[empty_index] = 0; */ /* val[empty_index] = 0; */ goto retry; } obj = NULL; my_putting_count_succ++; end: ; } else { my_removing_count++; clht_snapshot_t s; retry_rem: s.snapshot = snap->snapshot; volatile uintptr_t v; int i, removed = 0; for (i = 0; i < KEY_BUCKT && !removed; i++) { if (key[i] == k && s.map[i] == MAP_VALID) { v = val[i]; clht_snapshot_all_t s1 = snap_set_map(s.snapshot, i, MAP_INVLD); if (CAS_U64(&snap->snapshot, s.snapshot, s1) == s.snapshot) { /* snap->map[i] = MAP_INVLD; */ removed = 1; } else { num_retry_cas4++; goto retry_rem; } } } if (removed) { ssmem_free(alloc, (void*) v); my_removing_count_succ++; } } } free((void*) dat); #if defined(DEBUG) if (put_num_restarts | put_num_failed_expand | put_num_failed_on_new) { /* printf("put_num_restarts = %3u / put_num_failed_expand = %3u / put_num_failed_on_new = %3u \n", */ /* put_num_restarts, put_num_failed_expand, put_num_failed_on_new); */ } #endif if (ID < 2) { printf("#retry-stats-thread-%d: #cas1: %-8zu / #cas2: %-8zu /" "#cas3: %-8zu / #cas4: %-8zu / #cas5: %-8zu\n", ID, num_retry_cas1, num_retry_cas2, num_retry_cas3, num_retry_cas4, num_retry_cas5); } /* printf("gets: %-10llu / succ: %llu\n", num_get, num_get_succ); */ /* printf("rems: %-10llu / succ: %llu\n", num_rem, num_rem_succ); */ barrier_cross(&barrier); #if defined(COMPUTE_LATENCY) putting_succ[ID] += my_putting_succ; putting_fail[ID] += my_putting_fail; getting_succ[ID] += my_getting_succ; getting_fail[ID] += my_getting_fail; removing_succ[ID] += my_removing_succ; removing_fail[ID] += my_removing_fail; #endif putting_count[ID] += my_putting_count; getting_count[ID] += my_getting_count; removing_count[ID]+= my_removing_count; putting_count_succ[ID] += my_putting_count_succ; getting_count_succ[ID] += my_getting_count_succ; removing_count_succ[ID]+= my_removing_count_succ; #if (PFD_TYPE == 1) && defined(COMPUTE_LATENCY) if (ID == 0) { printf("get ----------------------------------------------------\n"); SSPFDPN(0, SSPFD_NUM_ENTRIES, print_vals_num); printf("put ----------------------------------------------------\n"); SSPFDPN(1, SSPFD_NUM_ENTRIES, print_vals_num); printf("rem ----------------------------------------------------\n"); SSPFDPN(2, SSPFD_NUM_ENTRIES, print_vals_num); } #endif /* SSPFDTERM(); */ pthread_exit(NULL); }
int main(int argc, char* const argv[]) { #ifndef NO_SET_CPU set_cpu(the_cores[0]); #endif #ifdef PRINT_OUTPUT fprintf(stderr, "The size of the data being tested: %lu\n",sizeof(data_type)); fprintf(stderr, "Number of entries per cache line: %lu\n",CACHE_LINE_SIZE / sizeof(data_t)); #endif struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"entries", required_argument, NULL, 'e'}, {"duration", required_argument, NULL, 'd'}, {"pause", required_argument, NULL, 'p'}, {"num-threads", required_argument, NULL, 'n'}, {"benchmark", required_argument, NULL, 'b'}, {NULL, 0, NULL, 0} }; correction = getticks_correction_calc(); int i, c; thread_data_t *data; pthread_t *threads; pthread_attr_t attr; barrier_t barrier; struct timeval start, end; struct timespec timeout; num_entries = DEFAULT_NUM_ENTRIES; num_threads = DEFAULT_NUM_THREADS; duration = DEFAULT_DURATION; benchmark = DEFAULT_BENCHMARK; op_pause = DEFAULT_PAUSE; sigset_t block_set; while(1) { i = 0; c = getopt_long(argc, argv, "he:d:p:n:b:", long_options, &i); if(c == -1) break; if(c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch(c) { case 0: /* Flag is automatically set */ break; case 'h': printf("lock stress test\n" "\n" "Usage:\n" " atomic_bench [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -e, --entires <int>\n" " Number of entries in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n" " -d, --duration <int>\n" " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" " -p, --pause <int>\n" " Pause between consecutive atomic operations in cycles (default=" XSTR(DEFAULT_DURATION) ")\n" " -n, --num-threads <int>\n" " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" " -b, --benchmark <int>\n" " benchmark to perform (0=throughput in atomic operation call, 1=throughput in successful atomic ops, 2=atomic op latency, default=" XSTR(DEFAULT_BENCHMARK) ")\n" ); exit(0); case 'e': num_entries = atoi(optarg); break; case 'd': duration = atoi(optarg); break; case 'n': num_threads = atoi(optarg); break; case 'p': op_pause = atoi(optarg); break; case 'b': benchmark = atoi(optarg); break; case '?': printf("Use -h or --help for help\n"); exit(0); default: exit(1); } } op_pause=op_pause/NOP_DURATION; num_entries = pow2roundup(num_entries); assert(duration >= 0); assert(num_entries >= 1); assert(num_threads > 0); #ifdef PRINT_OUTPUT printf("Number of entries : %d\n", num_entries); printf("Duration : %d\n", duration); printf("Number of threads : %d\n", num_threads); printf("Type sizes : int=%d/long=%d/ptr=%d\n", (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *)); #endif timeout.tv_sec = duration / 1000; timeout.tv_nsec = (duration % 1000) * 1000000; the_data = (data_t*)malloc(num_entries * sizeof(data_t)); for (i = 0; i < num_entries; i++) { the_data[i].data=0; } if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { perror("malloc"); exit(1); } if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { perror("malloc"); exit(1); } stop = 0; /* Access set from all threads */ barrier_init(&barrier, num_threads + 1); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (i = 0; i < num_threads; i++) { data[i].id = i; data[i].num_operations = 0; data[i].total_time=0; data[i].num_measured=0; data[i].barrier = &barrier; } void *(*test_function)(void*); switch(benchmark) { case 0: test_function = test_throughput; break; case 1: test_function = test_success; break; case 2: test_function = test_latency; break; default: fprintf(stderr, "benchmark not correctly specified\n"); exit(1); } for (i=0;i<num_threads; i++) { #ifdef PRINT_OUTPUT printf("Creating thread %d\n", i); #endif if (pthread_create(&threads[i], &attr, test_function, (void *)(&data[i])) != 0) { fprintf(stderr, "Error creating thread\n"); exit(1); } } pthread_attr_destroy(&attr); /* Catch some signals */ if (signal(SIGHUP, catcher) == SIG_ERR || signal(SIGINT, catcher) == SIG_ERR || signal(SIGTERM, catcher) == SIG_ERR) { perror("signal"); exit(1); } /* Start threads */ barrier_cross(&barrier); #ifdef PRINT_OUTPUT printf("STARTING...\n"); #endif gettimeofday(&start, NULL); if (duration > 0) { nanosleep(&timeout, NULL); } else { sigemptyset(&block_set); sigsuspend(&block_set); } stop = 1; gettimeofday(&end, NULL); #ifdef PRINT_OUTPUT printf("STOPPING...\n"); #endif /* Wait for thread completion */ for (i = 0; i < num_threads; i++) { if (pthread_join(threads[i], NULL) != 0) { fprintf(stderr, "Error waiting for thread completion\n"); exit(1); } } duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); unsigned long operations = 0; unsigned long total_measurements = 0; ticks total_ticks = 0; for (i = 0; i < num_threads; i++) { #ifdef PRINT_OUTPUT printf("Thread %d\n", i); printf(" #operations : %lu\n", data[i].num_operations); #endif operations += data[i].num_operations; if (benchmark==2) { total_ticks += data[i].total_time; total_measurements += data[i].num_measured; } } printf("Duration : %d (ms)\n", duration); printf("#operations : %lu (%f / s)\n", operations, operations * 1000.0 / duration); if (benchmark==2) { printf("average latency : %lu\n", total_ticks / total_measurements); } free((data_t*) the_data); free(threads); free(data); return 0; }