cntl_t* bli_unpackm_cntl_obj_create ( void* var_func, void* unpackm_var_func, cntl_t* sub_node ) { cntl_t* cntl; unpackm_params_t* params; // Allocate an unpackm_params_t struct. params = bli_malloc_intl( sizeof( unpackm_params_t ) ); // Initialize the unpackm_params_t struct. params->size = sizeof( unpackm_params_t ); params->var_func = unpackm_var_func; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_obj_create ( BLIS_NO_PART, var_func, params, sub_node ); return cntl; }
trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ) { trsv_t* cntl; cntl = ( trsv_t* ) bli_malloc_intl( sizeof(trsv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trsv = sub_trsv; cntl->sub_unpackv_x1 = sub_unpackv_x1; return cntl; }
thrcomm_t* bli_thrcomm_create( dim_t n_threads ) { thrcomm_t* comm = bli_malloc_intl( sizeof(thrcomm_t) ); bli_thrcomm_init( comm, n_threads ); return comm; }
her2_t* bli_her2_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ) { her2_t* cntl; cntl = ( her2_t* ) bli_malloc_intl( sizeof(her2_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger_rp = sub_ger_rp; cntl->sub_ger_cp = sub_ger_cp; cntl->sub_her2 = sub_her2; cntl->sub_unpackm_c11 = sub_unpackm_c11; return cntl; }
trsm_t* bli_trsm_cntl_create_node( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalm_t* sub_scalm, packm_t* sub_packm_a, packm_t* sub_packm_b, packm_t* sub_packm_c, trsm_t* sub_trsm, gemm_t* sub_gemm, unpackm_t* sub_unpackm_c ) { trsm_t* cntl; cntl = ( trsm_t* ) bli_malloc_intl( sizeof(trsm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalm = sub_scalm; cntl->sub_packm_a = sub_packm_a; cntl->sub_packm_b = sub_packm_b; cntl->sub_packm_c = sub_packm_c; cntl->sub_trsm = sub_trsm; cntl->sub_gemm = sub_gemm; cntl->sub_unpackm_c = sub_unpackm_c; return cntl; }
gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ) { gemv_t* cntl; cntl = ( gemv_t* ) bli_malloc_intl( sizeof(gemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a = sub_packm_a; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_gemv = sub_gemv; cntl->sub_unpackv_y = sub_unpackv_y; return cntl; }
cntl_t* bli_packv_cntl_obj_create ( void* var_func, void* packv_var_func, bszid_t bmid, pack_t pack_schema, cntl_t* sub_node ) { cntl_t* cntl; packv_params_t* params; // Allocate a packv_params_t struct. params = bli_malloc_intl( sizeof( packv_params_t ) ); // Initialize the packv_params_t struct. params->size = sizeof( packv_params_t ); params->packv_var_func = packv_var_func; params->bmid = bmid; params->pack_schema = pack_schema; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( BLIS_NO_PART, var_func, params, sub_node ); return cntl; }
void* bli_calloc_intl( size_t size ) { void* p = bli_malloc_intl( size ); memset( p, 0, size ); return p; }
void bli_thrcomm_init( thrcomm_t* communicator, dim_t n_threads) { if ( communicator == NULL ) return; communicator->sent_object = NULL; communicator->n_threads = n_threads; communicator->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads ); bli_thrcomm_tree_barrier_create( n_threads, BLIS_TREE_BARRIER_ARITY, communicator->barriers, 0 ); }
void bli_level3_thread_decorator ( dim_t n_threads, l3_int_t func, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, void* cntx, void* cntl, void** thread ) { pthread_t* pthreads = (pthread_t*) bli_malloc_intl(sizeof(pthread_t) * n_threads); thread_data_t* datas = (thread_data_t*) bli_malloc_intl(sizeof(thread_data_t) * n_threads); for( int i = 1; i < n_threads; i++ ) { //Setup the thread data datas[i].func = func; datas[i].alpha = alpha; datas[i].a = a; datas[i].b = b; datas[i].beta = beta; datas[i].c = c; datas[i].cntx = cntx; datas[i].cntl = cntl; datas[i].thread = thread[i]; pthread_create( &pthreads[i], NULL, &thread_decorator_helper, &datas[i] ); } func( alpha, a, b, beta, c, cntx, cntl, thread[0] ); for( int i = 1; i < n_threads; i++) { pthread_join( pthreads[i], NULL ); } bli_free_intl( pthreads ); bli_free_intl( datas ); }
scalv_t* bli_scalv_cntl_obj_create( impl_t impl_type, varnum_t var_num ) { scalv_t* cntl; cntl = ( scalv_t* ) bli_malloc_intl( sizeof(scalv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; return cntl; }
gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id, packm_thrinfo_t* opackm, packm_thrinfo_t* ipackm, gemm_thrinfo_t* sub_gemm ) { gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc_intl( sizeof( gemm_thrinfo_t ) ); bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, opackm, ipackm, sub_gemm ); return thread; }
packv_t* bli_packv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bmid, pack_t pack_schema ) { packv_t* cntl; cntl = ( packv_t* ) bli_malloc_intl( sizeof(packv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bmid = bmid; cntl->pack_schema = pack_schema; return cntl; }
blksz_t* bli_blksz_obj_create( dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ) { blksz_t* b; b = ( blksz_t* ) bli_malloc_intl( sizeof(blksz_t) ); bli_blksz_obj_init( b, b_s, be_s, b_d, be_d, b_c, be_c, b_z, be_z ); return b; }
//Tree barrier used for Intel Xeon Phi barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index ) { barrier_t* me = bli_malloc_intl( sizeof(barrier_t) ); me->dad = NULL; me->signal = 0; // Base Case if ( num_threads <= arity ) { //Now must be registered as a leaf for ( int i = 0; i < num_threads; i++ ) { leaves[ leaf_index + i ] = me; } me->count = num_threads; me->arity = num_threads; } else { // Otherwise this node has children int threads_per_kid = num_threads / arity; int defecit = num_threads - threads_per_kid * arity; for ( int i = 0; i < arity; i++ ) { int threads_this_kid = threads_per_kid; if ( i < defecit ) threads_this_kid++; barrier_t* kid = bli_thrcomm_tree_barrier_create( threads_this_kid, arity, leaves, leaf_index ); kid->dad = me; leaf_index += threads_this_kid; } me->count = arity; me->arity = arity; } return me; }
func_t* bli_func_create ( void_fp ptr_s, void_fp ptr_d, void_fp ptr_c, void_fp ptr_z ) { func_t* f; f = ( func_t* ) bli_malloc_intl( sizeof(func_t) ); bli_func_init ( f, ptr_s, ptr_d, ptr_c, ptr_z ); return f; }
ger_t* bli_ger_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ) { ger_t* cntl; cntl = ( ger_t* ) bli_malloc_intl( sizeof(ger_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_packm_a = sub_packm_a; cntl->sub_ger = sub_ger; cntl->sub_unpackm_a = sub_unpackm_a; return cntl; }
//Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; }