void abstraction_test_cas ( void) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct abstraction_test_cas_state *atcs; LFDS611_ALIGN (LFDS611_ALIGN_SINGLE_POINTER) volatile lfds611_atom_t shared_counter; lfds611_atom_t local_total = 0; // TRD : number_logical_processors can be any value in its range /* * TRD : here we test lfds611_abstraction_cas * * we run one thread per CPU * we use lfds611_abstraction_cas() to increment a shared counter * every time a thread successfully increments the counter, * it increments a thread local counter * the threads run for ten seconds * after the threads finish, we total the local counters * they should equal the shared counter */ internal_display_test_name ("Atomic CAS"); cpu_count = abstraction_cpu_count (); shared_counter = 0; LFDS611_BARRIER_STORE; atcs = malloc (sizeof (struct abstraction_test_cas_state) * cpu_count); for (loop = 0; loop < cpu_count; loop++) { (atcs + loop)->shared_counter = &shared_counter; (atcs + loop)->local_counter = 0; } thread_handles = malloc (sizeof (thread_state_t) * cpu_count); for (loop = 0; loop < cpu_count; loop++) abstraction_thread_start (&thread_handles[loop], loop, abstraction_test_internal_thread_cas, atcs + loop); for (loop = 0; loop < cpu_count; loop++) abstraction_thread_wait (thread_handles[loop]); free (thread_handles); // TRD : results for (loop = 0; loop < cpu_count; loop++) local_total += (atcs + loop)->local_counter; if (local_total == shared_counter) puts ("passed"); if (local_total != shared_counter) puts ("failed"); // TRD : cleanup free (atcs); return; }
void test_slist_new_delete_get( void ) { unsigned int loop, cpu_count; struct lfds611_slist_state *ss; struct lfds611_slist_element *se = NULL; struct slist_test_state *sts; thread_state_t *thread_handles; size_t total_create_count = 0, total_delete_count = 0, element_count = 0; enum lfds611_data_structure_validity dvs = LFDS611_VALIDITY_VALID; /* TRD : two threads per CPU first simply alternates between new_head() and new_next() (next on element created by head) second calls get_next, if NULL, then calls get_head, and deletes the element both threads keep count of created and deleted validate is to reconcile created, deleted and remaining in list */ internal_display_test_name( "New head/next, delete and get next" ); cpu_count = abstraction_cpu_count(); lfds611_slist_new( &ss, NULL, NULL ); sts = malloc( sizeof(struct slist_test_state) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) { (sts+loop)->ss = ss; (sts+loop)->create_count = 0; (sts+loop)->delete_count = 0; } thread_handles = malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, slist_test_internal_thread_new_delete_get_new_head_and_next, sts+loop ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, slist_test_internal_thread_new_delete_get_delete_and_get, sts+loop+cpu_count ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // TRD : now validate for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) { total_create_count += (sts+loop)->create_count; total_delete_count += (sts+loop)->delete_count; } while( NULL != lfds611_slist_get_head_and_then_next(ss, &se) ) element_count++; if( total_create_count - total_delete_count - element_count != 0 ) dvs = LFDS611_VALIDITY_INVALID_TEST_DATA; free( sts ); lfds611_slist_delete( ss ); internal_display_test_result( 1, "slist", dvs ); return; }
void test_stack( void ) { unsigned int loop, cpu_count; struct stack_state *ss; thread_state_t *thread_handles; /* TRD : there are 5 tests 1. single reader thread per CPU - stack always empty 2. single writer thread per CPU - stack always full 3. one reader and one writer thread per CPU - stack balanced 4. one reader and two writer threads per CPU - stack grows 5. two reader and one writer thread per CPU - stack tends to empty */ cpu_count = abstraction_cpu_count(); printf( "\n" "Stack Test\n" "==========\n" ); // TRD : 1. single reader thread per CPU printf( "\n" "1. single reader thread per CPU\n" "===============================\n" ); stack_new( &ss, 10000 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, stack_internal_thread_reader, ss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); stack_delete( ss, NULL, NULL ); free( thread_handles ); // TRD : 2. single writer thread per CPU printf( "\n" "2. single writer thread per CPU\n" "===============================\n" ); stack_new( &ss, 10000 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, stack_internal_thread_writer, ss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); stack_delete( ss, NULL, NULL ); free( thread_handles ); // TRD : 3. one reader and one writer thread per CPU printf( "\n" "3. one reader and one writer thread per CPU\n" "===========================================\n" ); stack_new( &ss, 10000 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, stack_internal_thread_reader, ss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, stack_internal_thread_writer, ss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); stack_delete( ss, NULL, NULL ); free( thread_handles ); // TRD : 4. one reader and two writer threads per CPU printf( "\n" "4. one reader and two writer threads per CPU\n" "============================================\n" ); stack_new( &ss, 10000 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 3 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, stack_internal_thread_reader, ss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, stack_internal_thread_writer, ss ); abstraction_thread_start( &thread_handles[loop+cpu_count*2], loop, stack_internal_thread_writer, ss ); } for( loop = 0 ; loop < cpu_count * 3 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); stack_delete( ss, NULL, NULL ); free( thread_handles ); // TRD : 5. two reader and one writer thread per CPU printf( "\n" "5. two reader and one writer thread per CPU\n" "===========================================\n" ); stack_new( &ss, 10000 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 3 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, stack_internal_thread_reader, ss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, stack_internal_thread_reader, ss ); abstraction_thread_start( &thread_handles[loop+cpu_count*2], loop, stack_internal_thread_writer, ss ); } for( loop = 0 ; loop < cpu_count * 3 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); stack_delete( ss, NULL, NULL ); free( thread_handles ); return; }
void test_slist_get_set_user_data( void ) { unsigned int loop, cpu_count; struct lfds611_slist_state *ss; struct lfds611_slist_element *se = NULL; struct slist_test_state *sts; thread_state_t *thread_handles; lfds611_atom_t thread_and_count, thread, count, *per_thread_counters, *per_thread_drop_flags; enum lfds611_data_structure_validity dvs = LFDS611_VALIDITY_VALID; /* TRD : create a list of (cpu_count*10) elements, user data 0 one thread per CPU each thread loops, setting user_data to ((thread_number << (sizeof(lfds611_atom_t)*8-8)) | count) validation is to scan list, count on a per thread basis should go down only once */ internal_display_test_name( "Get and set user data" ); cpu_count = abstraction_cpu_count(); lfds611_slist_new( &ss, NULL, NULL ); for( loop = 0 ; loop < cpu_count * 10 ; loop++ ) lfds611_slist_new_head( ss, NULL ); sts = malloc( sizeof(struct slist_test_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (sts+loop)->ss = ss; (sts+loop)->thread_and_count = (lfds611_atom_t) loop << (sizeof(lfds611_atom_t)*8-8); } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, slist_test_internal_thread_get_set_user_data, sts+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // now validate per_thread_counters = malloc( sizeof(lfds611_atom_t) * cpu_count ); per_thread_drop_flags = malloc( sizeof(lfds611_atom_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { *(per_thread_counters+loop) = 0; *(per_thread_drop_flags+loop) = 0; } while( dvs == LFDS611_VALIDITY_VALID and NULL != lfds611_slist_get_head_and_then_next(ss, &se) ) { lfds611_slist_get_user_data_from_element( se, (void **) &thread_and_count ); thread = thread_and_count >> (sizeof(lfds611_atom_t)*8-8); count = (thread_and_count << 8) >> 8; if( thread >= cpu_count ) { dvs = LFDS611_VALIDITY_INVALID_TEST_DATA; break; } if( per_thread_counters[thread] == 0 ) { per_thread_counters[thread] = count; continue; } per_thread_counters[thread]++; if( count < per_thread_counters[thread] and per_thread_drop_flags[thread] == 1 ) { dvs = LFDS611_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; break; } if( count < per_thread_counters[thread] and per_thread_drop_flags[thread] == 0 ) { per_thread_drop_flags[thread] = 1; per_thread_counters[thread] = count; continue; } if( count < per_thread_counters[thread] ) dvs = LFDS611_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; if( count >= per_thread_counters[thread] ) per_thread_counters[thread] = count; } free( per_thread_drop_flags ); free( per_thread_counters ); free( sts ); lfds611_slist_delete( ss ); internal_display_test_result( 1, "slist", dvs ); return; }
void queue_test_rapid_enqueuing_and_dequeuing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct lfds611_queue_state *qs; struct queue_test_rapid_enqueuing_and_dequeuing_state *qtreds; struct lfds611_validation_info vi = { 50000, 50000 }; lfds611_atom_t user_data, thread, count, *per_thread_counters; enum lfds611_data_structure_validity dvs[2]; internal_display_test_name( "Rapid enqueuing and dequeuing (10 seconds)" ); cpu_count = abstraction_cpu_count(); lfds611_queue_new( &qs, 100000 ); for( loop = 0 ; loop < 50000 ; loop++ ) lfds611_queue_enqueue( qs, NULL ); qtreds = malloc( sizeof(struct queue_test_rapid_enqueuing_and_dequeuing_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (qtreds+loop)->qs = qs; (qtreds+loop)->counter = (lfds611_atom_t) loop << (sizeof(lfds611_atom_t)*8-8); } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, queue_test_internal_thread_rapid_enqueuer_and_dequeuer, qtreds+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); lfds611_queue_query( qs, LFDS611_QUEUE_QUERY_VALIDATE, (void *) &vi, (void *) dvs ); // TRD : now check results per_thread_counters = malloc( sizeof(lfds611_atom_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) *(per_thread_counters+loop) = 0; while( dvs[0] == LFDS611_VALIDITY_VALID and dvs[1] == LFDS611_VALIDITY_VALID and lfds611_queue_dequeue(qs, (void *) &user_data) ) { thread = user_data >> (sizeof(lfds611_atom_t)*8-8); count = (user_data << 8) >> 8; if( thread >= cpu_count ) { dvs[0] = LFDS611_VALIDITY_INVALID_TEST_DATA; break; } if( per_thread_counters[thread] == 0 ) per_thread_counters[thread] = count; if( count < per_thread_counters[thread] ) dvs[0] = LFDS611_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; if( count >= per_thread_counters[thread] ) per_thread_counters[thread] = count+1; } free( per_thread_counters ); free( qtreds ); lfds611_queue_delete( qs, NULL, NULL ); internal_display_test_result( 2, "queue", dvs[0], "queue freelist", dvs[1] ); return; }
void abstraction_test_dcas( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct abstraction_test_dcas_state *atds; LFDS601_ALIGN(LFDS601_ALIGN_DOUBLE_POINTER) volatile lfds601_atom_t shared_counter[2] = { 0, 0 }; lfds601_atom_t local_total = 0; /* TRD : here we test lfds601_abstraction_dcas we run one thread per CPU we use lfds601_abstraction_dcas() to increment a shared counter every time a thread successfully increments the counter, it increments a thread local counter the threads run for ten seconds after the threads finish, we total the local counters they should equal the shared counter */ internal_display_test_name( "Atomic DCAS" ); cpu_count = abstraction_cpu_count(); atds = malloc( sizeof(struct abstraction_test_dcas_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (atds+loop)->shared_counter = shared_counter; (atds+loop)->local_counter = 0; } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, abstraction_test_internal_thread_dcas, atds+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // TRD : results for( loop = 0 ; loop < cpu_count ; loop++ ) local_total += (atds+loop)->local_counter; if( local_total == shared_counter[0] ) puts( "passed" ); if( local_total != shared_counter[0] ) puts( "failed" ); // TRD : cleanup free( atds ); return; }
void queue_test_enqueuing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct lfds611_queue_state *qs; struct queue_test_enqueuing_state *qtes; lfds611_atom_t user_data, thread, count, *per_thread_counters; struct lfds611_validation_info vi = { 1000000, 1000000 }; enum lfds611_data_structure_validity dvs[2]; /* TRD : create an empty queue with 1,000,000 elements in its freelist then run one thread per CPU where each thread busy-works, enqueuing elements (until there are no more elements) each element's void pointer of user data is (thread number | element number) where element_number is a thread-local counter starting at 0 where the thread_number occupies the top byte when we're done, we check that all the elements are present and increment on a per-thread basis */ internal_display_test_name( "Enqueuing" ); cpu_count = abstraction_cpu_count(); lfds611_queue_new( &qs, 1000000 ); qtes = malloc( sizeof(struct queue_test_enqueuing_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (qtes+loop)->qs = qs; (qtes+loop)->counter = (lfds611_atom_t) loop << (sizeof(lfds611_atom_t)*8-8); } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, queue_test_internal_thread_simple_enqueuer, qtes+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); free( qtes ); /* TRD : first, validate the queue then dequeue we expect to find element numbers increment on a per thread basis */ lfds611_queue_query( qs, LFDS611_QUEUE_QUERY_VALIDATE, &vi, dvs ); per_thread_counters = malloc( sizeof(lfds611_atom_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) *(per_thread_counters+loop) = 0; while( dvs[0] == LFDS611_VALIDITY_VALID and dvs[1] == LFDS611_VALIDITY_VALID and lfds611_queue_dequeue(qs, (void *) &user_data) ) { thread = user_data >> (sizeof(lfds611_atom_t)*8-8); count = (user_data << 8) >> 8; if( thread >= cpu_count ) { dvs[0] = LFDS611_VALIDITY_INVALID_TEST_DATA; break; } if( count < per_thread_counters[thread] ) dvs[0] = LFDS611_VALIDITY_INVALID_ADDITIONAL_ELEMENTS; if( count > per_thread_counters[thread] ) dvs[0] = LFDS611_VALIDITY_INVALID_MISSING_ELEMENTS; if( count == per_thread_counters[thread] ) per_thread_counters[thread]++; } free( per_thread_counters ); lfds611_queue_delete( qs, NULL, NULL ); internal_display_test_result( 2, "queue", dvs[0], "queue freelist", dvs[1] ); return; }
void freelist_test_internal_popping( void ) { unsigned int loop, cpu_count, count; thread_state_t *thread_handles; enum data_structure_validity dvs = VALIDITY_VALID; struct freelist_state *fs; struct freelist_element *fe; struct freelist_test_popping_state *ftps; unsigned int *found_count; /* TRD : we create a freelist with 1,000,000 elements the creation function runs in a single thread and creates and pushes those elements onto the freelist each element contains a void pointer which is its element number we then run one thread per CPU where each thread loops, popping as quickly as possible each popped element is pushed onto a thread-local freelist the threads run till the source freelist is empty we then check the thread-local freelists we should find we have every element then tidy up */ internal_display_test_name( "Popping" ); cpu_count = abstraction_cpu_count(); freelist_new( &fs, 1000000, freelist_test_internal_popping_init, NULL ); ftps = malloc( sizeof(struct freelist_test_popping_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (ftps+loop)->fs = fs; freelist_new( &(ftps+loop)->fs_thread_local, 0, NULL, NULL ); } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, freelist_test_internal_thread_popping, ftps+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // TRD : now we check the thread-local freelists found_count = malloc( sizeof(unsigned int) * 1000000 ); for( loop = 0 ; loop < 1000000 ; loop++ ) *(found_count+loop) = 0; for( loop = 0 ; loop < cpu_count ; loop++ ) { while( freelist_pop((ftps+loop)->fs_thread_local, &fe) ) { freelist_get_user_data_from_element( fe, (void **) &count ); (*(found_count+count))++; freelist_push( fs, fe ); } } for( loop = 0 ; loop < 1000000 and dvs == VALIDITY_VALID ; loop++ ) { if( *(found_count+loop) == 0 ) dvs = VALIDITY_INVALID_MISSING_ELEMENTS; if( *(found_count+loop) > 1 ) dvs = VALIDITY_INVALID_ADDITIONAL_ELEMENTS; } // TRD : cleanup free( found_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) freelist_delete( (ftps+loop)->fs_thread_local, NULL, NULL ); freelist_delete( fs, NULL, NULL ); // TRD : print the test result internal_display_test_result( 1, "freelist", dvs ); return; }
void test_lfds600_slist( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct lfds600_slist_thread_start_state stss; /* TRD : 1. one head writer per CPU 2. make one element, then one after writer per CPU 3. make a list, then one list traverser per CPU 4. one head writer and one list traverser per CPU 5. make one element, then one after writer and one list traverser per CPU 6. make a list, then one 100% deleter-traverser per CPU 7. make a list, then one 25% deleter-traverser per CPU 8. one head writer and one 100% deleter-traverse per CPU 9. one head writer and one 25% deleter-traverse per CPU 10. make one element, then one after writer and one 100% deleter-traverser per CPU 11. make one element, then one after writer and one 25% deleter-traverser per CPU 12. one head writer, one after writer, one traverser and one 25% deleter-traverser per CPU */ cpu_count = abstraction_cpu_count(); printf( "\n" "SList Test\n" "==========\n" ); // TRD : 1. one head writer per CPU printf( "\n" "1. one head writer per CPU\n" "==========================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = NULL; stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_head_writer, &stss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 2. make one element, then one after writer per CPU printf( "\n" "2. make one element, then one after writer per CPU\n" "==================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = lfds600_slist_new_head( stss.ss, (void *) NULL ); stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_after_writer, &stss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 3. make a list, then one list traverser per CPU printf( "\n" "3. make a list, then one list traverser per CPU\n" "===============================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = NULL; stss.duration = 10; // TRD : small list so we get collisions for( loop = 0 ; loop < 10 ; loop++ ) lfds600_slist_new_head( stss.ss, (void *) 0 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_traverser, &stss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 4. one head writer and one list traverser per CPU printf( "\n" "4. one head writer and one list traverser per CPU\n" "=================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = NULL; stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ )\ { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_head_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 5. make one element, then one after writer and one list traverser per CPU printf( "\n" "5. make one element, then one after writer and one list traverser per CPU\n" "=========================================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = lfds600_slist_new_head( stss.ss, (void *) NULL ); stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ )\ { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_after_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 6. make a list, then one 100% deleter-traverser per CPU printf( "\n" "6. make a list, then one 100%% deleter-traverser per CPU\n" "=======================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = NULL; stss.duration = 1; for( loop = 0 ; loop < 10000 ; loop++ ) lfds600_slist_new_head( stss.ss, (void *) 0 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 7. make a list, then one 25% deleter-traverser per CPU printf( "\n" "7. make a list, then one 25%% deleter-traverser per CPU\n" "======================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 4; stss.se = NULL; stss.duration = 1; for( loop = 0 ; loop < 10000 ; loop++ ) lfds600_slist_new_head( stss.ss, (void *) 0 ); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 1 ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 8. one head writer and one 100% deleter-traverse per CPU printf( "\n" "8. one head writer and one 100%% deleter-traverse per CPU\n" "========================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = NULL; stss.duration = 10; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_head_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 9. one head writer and one 25% deleter-traverse per CPU printf( "\n" "9. one head writer and one 25%% deleter-traverse per CPU\n" "=======================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 4; stss.se = NULL; stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_head_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 10. make one element, then one after writer and one 100% deleter-traverser per CPU printf( "\n" "10. make one element, then one after writer and one 100%% deleter-traverser per CPU\n" "==================================================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 1; stss.se = lfds600_slist_new_head( stss.ss, (void *) NULL ); stss.duration = 10; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_after_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 11. make one element, then one after writer and one 25% deleter-traverser per CPU printf( "\n" "11. make one element, then one after writer and one 25%% deleter-traverser per CPU\n" "=================================================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 4; stss.se = lfds600_slist_new_head( stss.ss, (void *) NULL ); stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_after_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); // TRD : 12. one head writer, one after writer, one traverser and one 25% deleter-traverser per CPU printf( "\n" "12. one head writer, one after writer, one traverser and one 25%% deleter-traverser per CPU\n" "==========================================================================================\n" ); lfds600_slist_new( &stss.ss, NULL, NULL ); stss.iteration_modulo = 4; stss.se = lfds600_slist_new_head( stss.ss, (void *) NULL ); stss.duration = 1; thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count * 4 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, lfds600_slist_internal_thread_head_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, lfds600_slist_internal_thread_after_writer, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count*2], loop, lfds600_slist_internal_thread_traverser, &stss ); abstraction_thread_start( &thread_handles[loop+cpu_count*3], loop, lfds600_slist_internal_thread_deleter_traverser, &stss ); } for( loop = 0 ; loop < cpu_count * 4 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); lfds600_slist_delete( stss.ss ); free( thread_handles ); return; }
void queue_test_dequeuing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct lfds611_queue_state *qs; struct queue_test_dequeuing_state *qtds; struct lfds611_validation_info vi = { 0, 0 }; enum lfds611_data_structure_validity dvs[2]; /* TRD : create a queue with 1,000,000 elements use a single thread to enqueue every element each elements user data is an incrementing counter then run one thread per CPU where each busy-works dequeuing when an element is dequeued, we check (on a per-thread basis) the value deqeued is greater than the element previously dequeued */ internal_display_test_name( "Dequeuing" ); cpu_count = abstraction_cpu_count(); lfds611_queue_new( &qs, 1000000 ); for( loop = 0 ; loop < 1000000 ; loop++ ) lfds611_queue_enqueue( qs, (void *) (lfds611_atom_t) loop ); qtds = malloc( sizeof(struct queue_test_dequeuing_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (qtds+loop)->qs = qs; (qtds+loop)->error_flag = LOWERED; } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, queue_test_internal_thread_simple_dequeuer, qtds+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // TRD : check queue is empty lfds611_queue_query( qs, LFDS611_QUEUE_QUERY_VALIDATE, (void *) &vi, (void *) dvs ); // TRD : check for raised error flags for( loop = 0 ; loop < cpu_count ; loop++ ) if( (qtds+loop)->error_flag == RAISED ) dvs[0] = LFDS611_VALIDITY_INVALID_TEST_DATA; free( qtds ); lfds611_queue_delete( qs, NULL, NULL ); internal_display_test_result( 2, "queue", dvs[0], "queue freelist", dvs[1] ); return; }
void benchmark_ringbuffer( void ) { unsigned int loop, thread_count, cpu_count; struct ringbuffer_state *rs; struct ringbuffer_benchmark *rb; thread_state_t *thread_handles; atom_t total_operations_for_full_test_for_all_cpus, total_operations_for_full_test_for_all_cpus_for_one_cpu = 0; double mean_operations_per_second_per_cpu, difference_per_second_per_cpu, total_difference_per_second_per_cpu, std_dev_per_second_per_cpu, scalability; /* TRD : here we benchmark the ringbuffer the benchmark is to have a single ringbuffer where a worker thread busy-works writing and then reading */ cpu_count = abstraction_cpu_count(); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count ); rb = (struct ringbuffer_benchmark *) malloc( sizeof(struct ringbuffer_benchmark) * cpu_count ); // TRD : print the benchmark ID and CSV header printf( "\n" "Release %d Ringbuffer Benchmark #1\n" "CPUs,total ops,mean ops/sec per CPU,standard deviation,scalability\n", LIBLFDS_RELEASE_NUMBER ); // TRD : we run CPU count times for scalability for( thread_count = 1 ; thread_count <= cpu_count ; thread_count++ ) { // TRD : initialisation ringbuffer_new( &rs, 1000, NULL, NULL ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (rb+loop)->rs = rs; (rb+loop)->operation_count = 0; } // TRD : main test for( loop = 0 ; loop < thread_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, benchmark_ringbuffer_thread_write_and_read, rb+loop ); for( loop = 0 ; loop < thread_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); // TRD : post test math total_operations_for_full_test_for_all_cpus = 0; total_difference_per_second_per_cpu = 0; for( loop = 0 ; loop < thread_count ; loop++ ) total_operations_for_full_test_for_all_cpus += (rb+loop)->operation_count; mean_operations_per_second_per_cpu = ((double) total_operations_for_full_test_for_all_cpus / (double) thread_count) / (double) 10; if( thread_count == 1 ) total_operations_for_full_test_for_all_cpus_for_one_cpu = total_operations_for_full_test_for_all_cpus; for( loop = 0 ; loop < thread_count ; loop++ ) { difference_per_second_per_cpu = ((double) (rb+loop)->operation_count / (double) 10) - mean_operations_per_second_per_cpu; total_difference_per_second_per_cpu += difference_per_second_per_cpu * difference_per_second_per_cpu; } std_dev_per_second_per_cpu = sqrt( (double) total_difference_per_second_per_cpu ); scalability = (double) total_operations_for_full_test_for_all_cpus / (double) (total_operations_for_full_test_for_all_cpus_for_one_cpu * thread_count); printf( "%u,%u,%.0f,%.0f,%0.2f\n", thread_count, (unsigned int) total_operations_for_full_test_for_all_cpus, mean_operations_per_second_per_cpu, std_dev_per_second_per_cpu, scalability ); // TRD : cleanup ringbuffer_delete( rs, NULL, NULL ); } free( rb ); free( thread_handles ); return; }
void freelist_test_internal_rapid_popping_and_pushing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; struct freelist_state *fs; struct validation_info vi; enum data_structure_validity dvs; /* TRD : in these tests there is a fundamental antagonism between how much checking/memory clean up that we do and the likelyhood of collisions between threads in their lock-free operations the lock-free operations are very quick; if we do anything much at all between operations, we greatly reduce the chance of threads colliding so we have some tests which do enough checking/clean up that they can tell the freelist is valid and don't leak memory and here, this test now is one of those which does minimal checking - in fact, the nature of the test is that you can't do any real checking - but goes very quickly what we do is create a small freelist and then run one thread per CPU, where each thread simply pops and then immediately pushes the test runs for ten seconds after the test is done, the only check we do is to traverse the freelist, checking for loops and ensuring the number of elements is correct */ internal_display_test_name( "Rapid popping and pushing (10 seconds)" ); cpu_count = abstraction_cpu_count(); freelist_new( &fs, cpu_count, NULL, NULL ); thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, freelist_test_internal_thread_rapid_popping_and_pushing, fs ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); vi.min_elements = cpu_count; vi.max_elements = cpu_count; freelist_query( fs, FREELIST_QUERY_VALIDATE, (void *) &vi, (void *) &dvs ); freelist_delete( fs, NULL, NULL ); // TRD : print the test result internal_display_test_result( 1, "freelist", dvs ); return; }
void freelist_test_internal_popping_and_pushing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; enum data_structure_validity dvs; struct freelist_state *fs; struct freelist_test_popping_and_pushing_state *pps; struct validation_info vi; /* TRD : we have two threads per CPU the threads loop for ten seconds the first thread pushes 100000 elements then pops 100000 elements the second thread pops 100000 elements then pushes 100000 elements all pushes and pops go onto the single main freelist after time is up, all threads push what they have remaining onto the main freelist we then validate the main freelist */ internal_display_test_name( "Popping and pushing (10 seconds)" ); cpu_count = abstraction_cpu_count(); freelist_new( &fs, 100000 * cpu_count, NULL, NULL ); pps = malloc( sizeof(struct freelist_test_popping_and_pushing_state) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (pps+loop)->fs = fs; freelist_new( &(pps+loop)->local_fs, 0, NULL, NULL ); (pps+loop+cpu_count)->fs = fs; freelist_new( &(pps+loop+cpu_count)->local_fs, 100000, NULL, NULL ); } thread_handles = malloc( sizeof(thread_state_t) * cpu_count * 2 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { abstraction_thread_start( &thread_handles[loop], loop, freelist_test_internal_thread_popping_and_pushing_start_popping, pps+loop ); abstraction_thread_start( &thread_handles[loop+cpu_count], loop, freelist_test_internal_thread_popping_and_pushing_start_pushing, pps+loop+cpu_count ); } for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); for( loop = 0 ; loop < cpu_count * 2 ; loop++ ) freelist_delete( (pps+loop)->local_fs, NULL, NULL ); free( pps ); vi.min_elements = vi.max_elements = 100000 * cpu_count * 2; freelist_query( fs, FREELIST_QUERY_VALIDATE, (void *) &vi, (void *) &dvs ); freelist_delete( fs, NULL, NULL ); // TRD : print the test result internal_display_test_result( 1, "freelist", dvs ); return; }
void abstraction_test_increment ( void) { unsigned int loop, cpu_count; thread_state_t *thread_handles; LFDS611_ALIGN (LFDS611_ALIGN_SINGLE_POINTER) volatile lfds611_atom_t shared_counter, atomic_shared_counter; /* * TRD : here we test lfds611_abstraction_increment * * first, we run one thread per CPU where each thread increments * a shared counter 10,000,000 times - however, this first test * does NOT use atomic increment; it uses "++" * * second, we repeat the exercise, but this time using * lfds611_abstraction_increment() * * if the final value in the first test is less than (10,000,000*cpu_count) * then the system is sensitive to non-atomic increments; this means if * our atomic version of the test passes, we can have some degree of confidence * that it works * * if the final value in the first test is in fact correct, then we can't know * that our atomic version has changed anything * * and of course if the final value in the atomic test is wrong, we know things * are broken */ internal_display_test_name ("Atomic increment"); cpu_count = abstraction_cpu_count (); shared_counter = 0; atomic_shared_counter = 0; LFDS611_BARRIER_STORE; thread_handles = malloc (sizeof (thread_state_t) * cpu_count); // TRD : non-atomic for (loop = 0; loop < cpu_count; loop++) abstraction_thread_start (&thread_handles[loop], loop, abstraction_test_internal_thread_increment, (void *)&shared_counter); for (loop = 0; loop < cpu_count; loop++) abstraction_thread_wait (thread_handles[loop]); // TRD : atomic for (loop = 0; loop < cpu_count; loop++) abstraction_thread_start (&thread_handles[loop], loop, abstraction_test_internal_thread_atomic_increment, (void *)&atomic_shared_counter); for (loop = 0; loop < cpu_count; loop++) abstraction_thread_wait (thread_handles[loop]); free (thread_handles); // TRD : results if (shared_counter < (10000000 * cpu_count) and atomic_shared_counter == (10000000 * cpu_count)) puts ("passed"); if (shared_counter == (10000000 * cpu_count) and atomic_shared_counter == (10000000 * cpu_count)) puts ("indeterminate"); if (atomic_shared_counter < (10000000 * cpu_count)) puts ("failed"); return; }
void queue_test_enqueuing_and_dequeuing( void ) { unsigned int loop, subloop, cpu_count; thread_state_t *thread_handles; struct lfds611_queue_state *qs; struct queue_test_enqueuing_and_dequeuing_state *qteds; struct lfds611_validation_info vi = { 0, 0 }; enum lfds611_data_structure_validity dvs[2]; internal_display_test_name( "Enqueuing and dequeuing (10 seconds)" ); cpu_count = abstraction_cpu_count(); lfds611_queue_new( &qs, cpu_count ); qteds = malloc( sizeof(struct queue_test_enqueuing_and_dequeuing_state) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (qteds+loop)->qs = qs; (qteds+loop)->thread_number = loop; (qteds+loop)->counter = (lfds611_atom_t) loop << (sizeof(lfds611_atom_t)*8-8); (qteds+loop)->cpu_count = cpu_count; (qteds+loop)->error_flag = LOWERED; (qteds+loop)->per_thread_counters = malloc( sizeof(lfds611_atom_t) * cpu_count ); for( subloop = 0 ; subloop < cpu_count ; subloop++ ) *((qteds+loop)->per_thread_counters+subloop) = 0; } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, queue_test_internal_thread_enqueuer_and_dequeuer, qteds+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); lfds611_queue_query( qs, LFDS611_QUEUE_QUERY_VALIDATE, (void *) &vi, (void *) dvs ); for( loop = 0 ; loop < cpu_count ; loop++ ) if( (qteds+loop)->error_flag == RAISED ) dvs[0] = LFDS611_VALIDITY_INVALID_TEST_DATA; for( loop = 0 ; loop < cpu_count ; loop++ ) free( (qteds+loop)->per_thread_counters ); free( qteds ); lfds611_queue_delete( qs, NULL, NULL ); internal_display_test_result( 2, "queue", dvs[0], "queue freelist", dvs[1] ); return; }
void benchmark_lfds610_stack( void ) { unsigned int loop, thread_count, cpu_count; struct lfds610_stack_state *ss; struct lfds610_stack_benchmark *sb; thread_state_t *thread_handles; lfds610_atom_t total_operations_for_full_test_for_all_cpus, total_operations_for_full_test_for_all_cpus_for_one_cpu = 0; double mean_operations_per_second_per_cpu, difference_per_second_per_cpu, total_difference_per_second_per_cpu, std_dev_per_second_per_cpu, scalability; /* TRD : here we benchmark the stack the benchmark is to have a single stack where a worker thread busy-works pushing then popping */ cpu_count = abstraction_cpu_count(); thread_handles = (thread_state_t *) malloc( sizeof(thread_state_t) * cpu_count ); sb = (struct lfds610_stack_benchmark *) malloc( sizeof(struct lfds610_stack_benchmark) * cpu_count ); // TRD : print the benchmark ID and CSV header printf( "\n" "Release %s Stack Benchmark #1\n" "CPUs,total ops,mean ops/sec per CPU,standard deviation,scalability\n", LFDS610_RELEASE_NUMBER_STRING ); // TRD : we run CPU count times for scalability for( thread_count = 1 ; thread_count <= cpu_count ; thread_count++ ) { // TRD : initialisation lfds610_stack_new( &ss, 1000 ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (sb+loop)->ss = ss; (sb+loop)->operation_count = 0; } // TRD : main test for( loop = 0 ; loop < thread_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, benchmark_lfds610_stack_thread_push_and_pop, sb+loop ); for( loop = 0 ; loop < thread_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); // TRD : post test math total_operations_for_full_test_for_all_cpus = 0; total_difference_per_second_per_cpu = 0; for( loop = 0 ; loop < thread_count ; loop++ ) total_operations_for_full_test_for_all_cpus += (sb+loop)->operation_count; mean_operations_per_second_per_cpu = ((double) total_operations_for_full_test_for_all_cpus / (double) thread_count) / (double) 10; if( thread_count == 1 ) total_operations_for_full_test_for_all_cpus_for_one_cpu = total_operations_for_full_test_for_all_cpus; for( loop = 0 ; loop < thread_count ; loop++ ) { difference_per_second_per_cpu = ((double) (sb+loop)->operation_count / (double) 10) - mean_operations_per_second_per_cpu; total_difference_per_second_per_cpu += difference_per_second_per_cpu * difference_per_second_per_cpu; } std_dev_per_second_per_cpu = sqrt( (double) total_difference_per_second_per_cpu ); scalability = (double) total_operations_for_full_test_for_all_cpus / (double) (total_operations_for_full_test_for_all_cpus_for_one_cpu * thread_count); printf( "%u,%u,%.0f,%.0f,%0.2f\n", thread_count, (unsigned int) total_operations_for_full_test_for_all_cpus, mean_operations_per_second_per_cpu, std_dev_per_second_per_cpu, scalability ); // TRD : cleanup lfds610_stack_delete( ss, NULL, NULL ); } free( sb ); free( thread_handles ); return; }
void freelist_test_internal_pushing( void ) { unsigned int loop, cpu_count; thread_state_t *thread_handles; enum data_structure_validity dvs; struct freelist_test_pushing_state *ftps; struct freelist_element *fe; struct freelist_state *fs, *cleanup_fs; struct freelist_test_counter_and_thread_number *cnt, *counter_and_number_trackers; struct validation_info vi = { 1000000, 1000000 }; /* TRD : we create an empty freelist, which we will push to we then create one freelist per CPU, where this freelist contains 1,000,000/cpu_count number of elements and each element is an incrementing counter and unique ID (from 0 to number of CPUs) we then start one thread per CPU, where each thread is given one of the populated freelists and pops from that to push to the empty freelist the reason for this is to achieve memory pre-allocation which allows the pushing threads to run at maximum speed the threads end when their freelists are empty we then fully pop the now populated main freelist (onto a second freelist, so we can cleanly free all memory), checking that the counts increment on a per unique ID basis and that the number of elements we pop equals 1,000,000 (since each element has an incrementing counter which is unique on a per unique ID basis, we can know we didn't lose any elements) */ internal_display_test_name( "Pushing" ); cpu_count = abstraction_cpu_count(); ftps = malloc( sizeof(struct freelist_test_pushing_state) * cpu_count ); freelist_new( &fs, 0, NULL, NULL ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (ftps+loop)->thread_number = (atom_t) loop; freelist_new( &(ftps+loop)->source_fs, 1000000 / cpu_count, freelist_test_internal_pushing_init, (void *) (atom_t) loop ); (ftps+loop)->fs = fs; } thread_handles = malloc( sizeof(thread_state_t) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_start( &thread_handles[loop], loop, freelist_test_internal_thread_pushing, ftps+loop ); for( loop = 0 ; loop < cpu_count ; loop++ ) abstraction_thread_wait( thread_handles[loop] ); free( thread_handles ); // TRD : now fully pop and verify the main freelist freelist_new( &cleanup_fs, 0, NULL, NULL ); counter_and_number_trackers = malloc( sizeof(struct freelist_test_counter_and_thread_number) * cpu_count ); for( loop = 0 ; loop < cpu_count ; loop++ ) { (counter_and_number_trackers+loop)->counter = (1000000 / cpu_count) * loop; (counter_and_number_trackers+loop)->thread_number = (atom_t) loop; } freelist_query( fs, FREELIST_QUERY_VALIDATE, &vi, (void *) &dvs ); while( dvs == VALIDITY_VALID and freelist_pop(fs, &fe) ) { static int count = 0; freelist_get_user_data_from_element( fe, (void **) &cnt ); if( cnt->counter != (counter_and_number_trackers+cnt->thread_number)->counter++ ) dvs = VALIDITY_INVALID_MISSING_ELEMENTS; freelist_push( cleanup_fs, fe ); count++; } // TRD : clean up free( counter_and_number_trackers ); for( loop = 0 ; loop < cpu_count ; loop++ ) freelist_delete( (ftps+loop)->source_fs, NULL, NULL ); free( ftps ); freelist_delete( cleanup_fs, freelist_test_internal_pushing_delete, NULL ); freelist_delete( fs, NULL, NULL ); // TRD : print the test result internal_display_test_result( 1, "freelist", dvs ); return; }