void matrix_inplace_matmul_mt2(matrix_type * A, const matrix_type * B , thread_pool_type * thread_pool){ int num_threads = thread_pool_get_max_running( thread_pool ); arg_pack_type ** arglist = util_malloc( num_threads * sizeof * arglist ); int it; thread_pool_restart( thread_pool ); { int rows = matrix_get_rows( A ) / num_threads; int rows_mod = matrix_get_rows( A ) % num_threads; int row_offset = 0; for (it = 0; it < num_threads; it++) { int row_size; arglist[it] = arg_pack_alloc(); row_size = rows; if (it < rows_mod) row_size += 1; arg_pack_append_int(arglist[it] , row_offset ); arg_pack_append_int(arglist[it] , row_size ); arg_pack_append_ptr(arglist[it] , A ); arg_pack_append_const_ptr(arglist[it] , B ); thread_pool_add_job( thread_pool , matrix_inplace_matmul_mt__ , arglist[it]); row_offset += row_size; } } thread_pool_join( thread_pool ); for (it = 0; it < num_threads; it++) arg_pack_free( arglist[it] ); free( arglist ); }
thread_pool_type * thread_pool_alloc(int max_running , bool start_queue) { thread_pool_type * pool = util_malloc( sizeof *pool ); pool->job_slots = util_calloc( max_running , sizeof * pool->job_slots ); pool->max_running = max_running; pool->queue = NULL; pool->accepting_jobs = false; pthread_rwlock_init( &pool->queue_lock , NULL); thread_pool_resize_queue( pool , 32 ); if (start_queue) thread_pool_restart( pool ); return pool; }