void libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_cleanup( struct lfds710_list_aso_state *logical_processor_set, enum libbenchmark_topology_numa_mode numa_mode, struct libbenchmark_results_state *rs, struct libbenchmark_threadset_state *tsets ) { struct lfds710_list_asu_element *lasue = NULL; struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state *obs; struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state *ptbs; struct libbenchmark_threadset_per_thread_state *pts; LFDS710_PAL_ASSERT( logical_processor_set != NULL ); // TRD : numa_mode can be any value in its range LFDS710_PAL_ASSERT( rs != NULL ); LFDS710_PAL_ASSERT( tsets != NULL ); while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) ) { pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue ); ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts ); libbenchmark_results_put_result( rs, LIBBENCHMARK_DATASTRUCTURE_ID_QUEUE_UMM, LIBBENCHMARK_BENCHMARK_ID_ENQUEUE_UMM1_THEN_DEQUEUE_UMM1, LIBBENCHMARK_LOCK_ID_LIBLFDS700_LOCKFREE, numa_mode, logical_processor_set, LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ), LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ), ptbs->operation_count ); } obs = tsets->users_threadset_state; lfds700_queue_cleanup( obs->qs, NULL ); return; }
void test_lfds700_queue_enqueuing( struct lfds700_list_asu_state *list_of_logical_processors, lfds700_pal_uint_t memory_in_megabytes ) { enum lfds700_misc_validity dvs = LFDS700_MISC_VALIDITY_VALID; lfds700_pal_uint_t *per_thread_counters, loop, number_elements, number_logical_processors; struct lfds700_list_asu_element *lasue; struct lfds700_misc_prng_state ps; struct lfds700_queue_element dummy_qe, *qe; struct lfds700_queue_state qs; struct lfds700_misc_validation_info vi; struct test_pal_logical_processor *lp; struct util_thread_starter_state *tts; struct test_element *te; struct test_state *ts; test_pal_thread_state_t *thread_handles; assert( list_of_logical_processors != NULL ); // TRD : memory_in_megabytes can be any value in its range /* TRD : create an empty queue then run one thread per CPU where each thread busy-works, enqueuing elements from a freelist (one local freelist per thread) until 100000 elements are enqueued, per thread each element's void pointer of user data is a struct containing thread number and element number where element_number is a thread-local counter starting at 0 when we're done, we check that all the elements are present and increment on a per-thread basis */ internal_display_test_name( "Enqueuing" ); lfds700_list_asu_query( list_of_logical_processors, LFDS700_LIST_ASU_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void **) &number_logical_processors ); lfds700_misc_prng_init( &ps ); number_elements = ( memory_in_megabytes * ONE_MEGABYTE_IN_BYTES ) / ( sizeof(struct test_element) * number_logical_processors ); lfds700_queue_init_valid_on_current_logical_core( &qs, &dummy_qe, &ps, NULL ); ts = util_malloc_wrapper( sizeof(struct test_state) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) { (ts+loop)->qs = &qs; (ts+loop)->thread_number = loop; (ts+loop)->number_elements = number_elements; } thread_handles = util_malloc_wrapper( sizeof(test_pal_thread_state_t) * number_logical_processors ); util_thread_starter_new( &tts, number_logical_processors ); LFDS700_MISC_BARRIER_STORE; lfds700_misc_force_store(); loop = 0; lasue = NULL; while( LFDS700_LIST_ASU_GET_START_AND_THEN_NEXT(*list_of_logical_processors, lasue) ) { lp = LFDS700_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue ); util_thread_starter_start( tts, &thread_handles[loop], loop, lp, thread_simple_enqueuer, ts+loop ); loop++; } util_thread_starter_run( tts ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) test_pal_thread_wait( thread_handles[loop] ); util_thread_starter_delete( tts ); free( thread_handles ); LFDS700_MISC_BARRIER_LOAD; /* TRD : first, validate the queue then dequeue we expect to find element numbers increment on a per thread basis */ vi.min_elements = vi.max_elements = number_elements * number_logical_processors; lfds700_queue_query( &qs, LFDS700_QUEUE_QUERY_SINGLETHREADED_VALIDATE, &vi, &dvs ); per_thread_counters = util_malloc_wrapper( sizeof(lfds700_pal_uint_t) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) *(per_thread_counters+loop) = 0; while( dvs == LFDS700_MISC_VALIDITY_VALID and lfds700_queue_dequeue(&qs, &qe, &ps) ) { te = LFDS700_QUEUE_GET_VALUE_FROM_ELEMENT( *qe ); if( te->thread_number >= number_logical_processors ) { dvs = LFDS700_MISC_VALIDITY_INVALID_TEST_DATA; break; } if( te->counter > per_thread_counters[te->thread_number] ) dvs = LFDS700_MISC_VALIDITY_INVALID_MISSING_ELEMENTS; if( te->counter < per_thread_counters[te->thread_number] ) dvs = LFDS700_MISC_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; if( te->counter == per_thread_counters[te->thread_number] ) per_thread_counters[te->thread_number]++; } free( per_thread_counters ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) util_aligned_free( (ts+loop)->te_array ); free( ts ); lfds700_queue_cleanup( &qs, NULL ); internal_display_test_result( 1, "queue", dvs ); return; }
void test_lfds700_queue_enqueuing_with_malloc_and_dequeuing_with_free( struct lfds700_list_asu_state *list_of_logical_processors ) { enum lfds700_misc_validity dvs = LFDS700_MISC_VALIDITY_VALID; lfds700_pal_uint_t loop, number_logical_processors; struct lfds700_list_asu_element *lasue; struct lfds700_misc_prng_state ps; struct lfds700_queue_element *qe; struct lfds700_queue_state qs; struct lfds700_misc_validation_info vi; struct test_pal_logical_processor *lp; struct util_thread_starter_state *tts; struct test_state *ts; test_pal_thread_state_t *thread_handles; assert( list_of_logical_processors != NULL ); // TRD : qt can be any value in its range /* TRD : one thread per logical core each thread loops for ten seconds mallocs and enqueues 1k elements, then dequeues and frees 1k elements */ internal_display_test_name( "Enqueuing with malloc dequeuing with free (%d seconds)", TEST_DURATION_IN_SECONDS ); lfds700_list_asu_query( list_of_logical_processors, LFDS700_LIST_ASU_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void **) &number_logical_processors ); lfds700_misc_prng_init( &ps ); qe = util_aligned_malloc( sizeof(struct lfds700_queue_element), LFDS700_PAL_ATOMIC_ISOLATION_IN_BYTES ); lfds700_queue_init_valid_on_current_logical_core( &qs, qe, &ps, NULL ); ts = util_malloc_wrapper( sizeof(struct test_state) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) (ts+loop)->qs = &qs; thread_handles = util_malloc_wrapper( sizeof(test_pal_thread_state_t) * number_logical_processors ); util_thread_starter_new( &tts, number_logical_processors ); LFDS700_MISC_BARRIER_STORE; lfds700_misc_force_store(); loop = 0; lasue = NULL; while( LFDS700_LIST_ASU_GET_START_AND_THEN_NEXT(*list_of_logical_processors, lasue) ) { lp = LFDS700_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue ); util_thread_starter_start( tts, &thread_handles[loop], loop, lp, thread_enqueuer_with_malloc_and_dequeuer_with_free, ts+loop ); loop++; } util_thread_starter_run( tts ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) test_pal_thread_wait( thread_handles[loop] ); util_thread_starter_delete( tts ); free( thread_handles ); LFDS700_MISC_BARRIER_LOAD; vi.min_elements = vi.max_elements = 0; lfds700_queue_query( &qs, LFDS700_QUEUE_QUERY_SINGLETHREADED_VALIDATE, &vi, &dvs ); free( ts ); lfds700_queue_cleanup( &qs, queue_element_cleanup_callback ); internal_display_test_result( 1, "queue", dvs ); return; }
void test_lfds700_queue_rapid_enqueuing_and_dequeuing( struct lfds700_list_asu_state *list_of_logical_processors, lfds700_pal_uint_t memory_in_megabytes ) { enum lfds700_misc_validity dvs = LFDS700_MISC_VALIDITY_VALID; lfds700_pal_uint_t loop, number_elements_with_dummy_element, number_elements_without_dummy_element, number_logical_processors, *per_thread_counters; struct lfds700_list_asu_element *lasue; struct lfds700_misc_prng_state ps; struct lfds700_queue_element *qe; struct lfds700_misc_validation_info vi; struct lfds700_queue_state qs; struct test_pal_logical_processor *lp; struct util_thread_starter_state *tts; struct test_element *te_array, *te; struct test_state *ts; test_pal_thread_state_t *thread_handles; assert( list_of_logical_processors != NULL ); // TRD : memory_in_megabytes can be any value in its range /* TRD : we create a single queue with 50,000 elements we don't want too many elements, so we ensure plenty of element re-use each thread simply loops dequeuing and enqueuing where the user data indicates thread number and an increment counter vertification is that the counter increments on a per-thread basis */ internal_display_test_name( "Rapid enqueuing and dequeuing (%d seconds)", TEST_DURATION_IN_SECONDS ); lfds700_list_asu_query( list_of_logical_processors, LFDS700_LIST_ASU_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void **) &number_logical_processors ); lfds700_misc_prng_init( &ps ); number_elements_with_dummy_element = ( memory_in_megabytes * ONE_MEGABYTE_IN_BYTES ) / sizeof(struct test_element); if( number_elements_with_dummy_element > (10000 * number_logical_processors) + 1 ) number_elements_with_dummy_element = (10000 * number_logical_processors) + 1; number_elements_without_dummy_element = number_elements_with_dummy_element - 1; vi.min_elements = number_elements_without_dummy_element; vi.max_elements = number_elements_without_dummy_element; te_array = util_aligned_malloc( sizeof(struct test_element) * number_elements_with_dummy_element, LFDS700_PAL_ATOMIC_ISOLATION_IN_BYTES ); lfds700_queue_init_valid_on_current_logical_core( &qs, &(te_array+number_elements_without_dummy_element)->qe, &ps, NULL ); // TRD : we assume the test will iterate at least once (or we'll have a false negative) for( loop = 0 ; loop < number_elements_without_dummy_element ; loop++ ) { (te_array+loop)->thread_number = loop; (te_array+loop)->counter = 0; LFDS700_QUEUE_SET_VALUE_IN_ELEMENT( (te_array+loop)->qe, te_array+loop ); lfds700_queue_enqueue( &qs, &(te_array+loop)->qe, &ps ); } ts = util_malloc_wrapper( sizeof(struct test_state) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) { (ts+loop)->qs = &qs; (ts+loop)->thread_number = loop; (ts+loop)->counter = 0; } thread_handles = util_malloc_wrapper( sizeof(test_pal_thread_state_t) * number_logical_processors ); util_thread_starter_new( &tts, number_logical_processors ); LFDS700_MISC_BARRIER_STORE; lfds700_misc_force_store(); loop = 0; lasue = NULL; while( LFDS700_LIST_ASU_GET_START_AND_THEN_NEXT(*list_of_logical_processors, lasue) ) { lp = LFDS700_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue ); util_thread_starter_start( tts, &thread_handles[loop], loop, lp, thread_rapid_enqueuer_and_dequeuer, ts+loop ); loop++; } util_thread_starter_run( tts ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) test_pal_thread_wait( thread_handles[loop] ); util_thread_starter_delete( tts ); free( thread_handles ); LFDS700_MISC_BARRIER_LOAD; lfds700_queue_query( &qs, LFDS700_QUEUE_QUERY_SINGLETHREADED_VALIDATE, &vi, &dvs ); // TRD : now check results per_thread_counters = util_malloc_wrapper( sizeof(lfds700_pal_uint_t) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) *(per_thread_counters+loop) = 0; while( dvs == LFDS700_MISC_VALIDITY_VALID and lfds700_queue_dequeue(&qs, &qe, &ps) ) { te = LFDS700_QUEUE_GET_VALUE_FROM_ELEMENT( *qe ); if( te->thread_number >= number_logical_processors ) { dvs = LFDS700_MISC_VALIDITY_INVALID_TEST_DATA; break; } if( per_thread_counters[te->thread_number] == 0 ) per_thread_counters[te->thread_number] = te->counter; if( te->counter > per_thread_counters[te->thread_number] ) dvs = LFDS700_MISC_VALIDITY_INVALID_MISSING_ELEMENTS; if( te->counter < per_thread_counters[te->thread_number] ) dvs = LFDS700_MISC_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; if( te->counter == per_thread_counters[te->thread_number] ) per_thread_counters[te->thread_number]++; } free( per_thread_counters ); lfds700_queue_cleanup( &qs, NULL ); util_aligned_free( te_array ); free( ts ); internal_display_test_result( 1, "queue", dvs ); return; }
void test_lfds700_queue_dequeuing( struct lfds700_list_asu_state *list_of_logical_processors, lfds700_pal_uint_t memory_in_megabytes ) { enum lfds700_misc_validity dvs = LFDS700_MISC_VALIDITY_VALID; lfds700_pal_uint_t loop, number_elements_with_dummy_element, number_elements_without_dummy_element, number_logical_processors; struct lfds700_list_asu_element *lasue; struct lfds700_misc_prng_state ps; struct lfds700_queue_state qs; struct lfds700_misc_validation_info vi = { 0, 0 }; struct test_pal_logical_processor *lp; struct util_thread_starter_state *tts; struct test_element *te_array; struct test_state *ts; test_pal_thread_state_t *thread_handles; assert( list_of_logical_processors != NULL ); // TRD : memory_in_megabytes can be any value in its range /* TRD : create a queue, add 1,000,000 elements use a single thread to enqueue every element each elements user data is an incrementing counter then run one thread per CPU where each busy-works dequeuing when an element is dequeued, we check (on a per-thread basis) the value dequeued is greater than the element previously dequeued note we have no variation in the test for CAS+GC vs DWCAS this is because all we do is dequeue what we actually want to stress test is the queue not CAS so it's better to let the dequeue run as fast as possible */ internal_display_test_name( "Dequeuing" ); lfds700_list_asu_query( list_of_logical_processors, LFDS700_LIST_ASU_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void **) &number_logical_processors ); lfds700_misc_prng_init( &ps ); number_elements_with_dummy_element = ( memory_in_megabytes * ONE_MEGABYTE_IN_BYTES ) / sizeof(struct test_element); number_elements_without_dummy_element = number_elements_with_dummy_element - 1; te_array = util_aligned_malloc( sizeof(struct test_element) * number_elements_with_dummy_element, LFDS700_PAL_ATOMIC_ISOLATION_IN_BYTES ); lfds700_queue_init_valid_on_current_logical_core( &qs, &(te_array + number_elements_without_dummy_element)->qe, &ps, NULL ); for( loop = 0 ; loop < number_elements_without_dummy_element ; loop++ ) { LFDS700_QUEUE_SET_VALUE_IN_ELEMENT( (te_array+loop)->qe, loop ); lfds700_queue_enqueue( &qs, &(te_array+loop)->qe, &ps ); } ts = util_malloc_wrapper( sizeof(struct test_state) * number_logical_processors ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) { (ts+loop)->qs = &qs; (ts+loop)->error_flag = LOWERED; } thread_handles = util_malloc_wrapper( sizeof(test_pal_thread_state_t) * number_logical_processors ); util_thread_starter_new( &tts, number_logical_processors ); LFDS700_MISC_BARRIER_STORE; lfds700_misc_force_store(); loop = 0; lasue = NULL; while( LFDS700_LIST_ASU_GET_START_AND_THEN_NEXT(*list_of_logical_processors, lasue) ) { lp = LFDS700_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue ); util_thread_starter_start( tts, &thread_handles[loop], loop, lp, thread_simple_dequeuer, ts+loop ); loop++; } util_thread_starter_run( tts ); for( loop = 0 ; loop < number_logical_processors ; loop++ ) test_pal_thread_wait( thread_handles[loop] ); util_thread_starter_delete( tts ); free( thread_handles ); LFDS700_MISC_BARRIER_LOAD; // TRD : check queue is empty lfds700_queue_query( &qs, LFDS700_QUEUE_QUERY_SINGLETHREADED_VALIDATE, &vi, &dvs ); // TRD : check for raised error flags for( loop = 0 ; loop < number_logical_processors ; loop++ ) if( (ts+loop)->error_flag == RAISED ) dvs = LFDS700_MISC_VALIDITY_INVALID_TEST_DATA; free( ts ); util_aligned_free( te_array ); lfds700_queue_cleanup( &qs, NULL ); internal_display_test_result( 1, "queue", dvs ); return; }