示例#1
0
void bli_l3_thread_decorator
     (
       l3int_t     func,
       obj_t*      alpha,
       obj_t*      a,
       obj_t*      b,
       obj_t*      beta,
       obj_t*      c,
       cntx_t*     cntx,
       cntl_t*     cntl
     )
{
	// For sequential execution, we use only one thread.
	dim_t      n_threads = 1;
	dim_t      id        = 0;

	// Allcoate a global communicator for the root thrinfo_t structures.
	thrcomm_t* gl_comm   = bli_thrcomm_create( n_threads );

	cntl_t*    cntl_use;
	thrinfo_t* thread;

	// Create a default control tree for the operation, if needed.
	bli_l3_cntl_create_if( a, b, c, cntx, cntl, &cntl_use );

	// Create the root node of the thread's thrinfo_t structure.
	bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );

	func
	(
	  alpha,
	  a,
	  b,
	  beta,
	  c,
	  cntx,
	  cntl_use,
	  thread
	);

	// Free the control tree, if one was created locally.
	bli_l3_cntl_free_if( a, b, c, cntx, cntl, cntl_use, thread );

	// Free the current thread's thrinfo_t structure.
	bli_l3_thrinfo_free( thread );

	// We shouldn't free the global communicator since it was already freed
	// by the global communicator's chief thread in bli_l3_thrinfo_free()
	// (called above).
}
示例#2
0
void bli_l3_thread_decorator
     (
       l3int_t     func,
       opid_t      family,
       obj_t*      alpha,
       obj_t*      a,
       obj_t*      b,
       obj_t*      beta,
       obj_t*      c,
       cntx_t*     cntx,
       rntm_t*     rntm,
       cntl_t*     cntl
     )
{
	// This is part of a hack to support mixed domain in bli_gemm_front().
	// Sometimes we need to specify a non-standard schema for A and B, and
	// we decided to transmit them via the schema field in the obj_t's
	// rather than pass them in as function parameters. Once the values
	// have been read, we immediately reset them back to their expected
	// values for unpacked objects.
	pack_t schema_a = bli_obj_pack_schema( a );
	pack_t schema_b = bli_obj_pack_schema( b );
	bli_obj_set_pack_schema( BLIS_NOT_PACKED, a );
	bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );

	// For sequential execution, we use only one thread.
	const dim_t n_threads = 1;

	// NOTE: The sba was initialized in bli_init().

	// Check out an array_t from the small block allocator. This is done
	// with an internal lock to ensure only one application thread accesses
	// the sba at a time. bli_sba_checkout_array() will also automatically
	// resize the array_t, if necessary.
	array_t* restrict array = bli_sba_checkout_array( n_threads );

	// Access the pool_t* for thread 0 and embed it into the rntm. We do
	// this up-front only so that we can create the global comm below.
	bli_sba_rntm_set_pool( 0, array, rntm );

	// Set the packing block allocator field of the rntm.
	bli_membrk_rntm_set_membrk( rntm );

	// Allcoate a global communicator for the root thrinfo_t structures.
	thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );


	{
		// NOTE: We don't need to create another copy of the rntm_t since
		// it was already copied in one of the high-level oapi functions.
		rntm_t* restrict rntm_p = rntm;

		cntl_t*    cntl_use;
		thrinfo_t* thread;

		const dim_t tid = 0;

		// Use the thread id to access the appropriate pool_t* within the
		// array_t, and use it to set the sba_pool field within the rntm_t.
		// If the pool_t* element within the array_t is NULL, it will first
		// be allocated/initialized.
		// NOTE: This is commented out because, in the single-threaded case,
		// this is redundant since it's already been done above.
		//bli_sba_rntm_set_pool( tid, array, rntm_p );

		// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
		// need to alias objects for A, B, and C since they were already aliased
		// in bli_*_front(). However, we may add aliasing here in the future so
		// that, with all three (_single.c, _openmp.c, _pthreads.c) implementations
		// consistently providing local aliases, we can then eliminate aliasing
		// elsewhere.

		// Create a default control tree for the operation, if needed.
		bli_l3_cntl_create_if( family, schema_a, schema_b,
		                       a, b, c, rntm_p, cntl, &cntl_use );

		// Create the root node of the thread's thrinfo_t structure.
		bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread );

		func
		(
		  alpha,
		  a,
		  b,
		  beta,
		  c,
		  cntx,
		  rntm_p,
		  cntl_use,
		  thread
		);

		// Free the thread's local control tree.
		bli_l3_cntl_free( rntm_p, cntl_use, thread );

		// Free the current thread's thrinfo_t structure.
		bli_l3_thrinfo_free( rntm_p, thread );
	}

	// We shouldn't free the global communicator since it was already freed
	// by the global communicator's chief thread in bli_l3_thrinfo_free()
	// (called above).

	// Check the array_t back into the small block allocator. Similar to the
	// check-out, this is done using a lock embedded within the sba to ensure
	// mutual exclusion.
	bli_sba_checkin_array( array );
}