trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ) { trsv_t* cntl; cntl = ( trsv_t* ) bli_malloc( sizeof(trsv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trsv = sub_trsv; cntl->sub_unpackv_x1 = sub_unpackv_x1; return cntl; }
trsm_t* bli_trsm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, func_t* gemm_ukrs_, func_t* gemmtrsm_l_ukrs_, func_t* gemmtrsm_u_ukrs_, scalm_t* sub_scalm, packm_t* sub_packm_a, packm_t* sub_packm_b, packm_t* sub_packm_c, trsm_t* sub_trsm, gemm_t* sub_gemm, unpackm_t* sub_unpackm_c ) { trsm_t* cntl; cntl = ( trsm_t* ) bli_malloc( sizeof(trsm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->gemm_ukrs = gemm_ukrs_; cntl->gemmtrsm_l_ukrs = gemmtrsm_l_ukrs_; cntl->gemmtrsm_u_ukrs = gemmtrsm_u_ukrs_; cntl->sub_scalm = sub_scalm; cntl->sub_packm_a = sub_packm_a; cntl->sub_packm_b = sub_packm_b; cntl->sub_packm_c = sub_packm_c; cntl->sub_trsm = sub_trsm; cntl->sub_gemm = sub_gemm; cntl->sub_unpackm_c = sub_unpackm_c; return cntl; }
herk_t* bli_herk_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, blksz_t* b_aux, scalm_t* sub_scalm, packm_t* sub_packm_a, packm_t* sub_packm_b, packm_t* sub_packm_c, herk_t* sub_herk, unpackm_t* sub_unpackm_c ) { herk_t* cntl; cntl = ( herk_t* ) bli_malloc( sizeof(herk_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->b_aux = b_aux; cntl->sub_scalm = sub_scalm; cntl->sub_packm_a = sub_packm_a; cntl->sub_packm_b = sub_packm_b; cntl->sub_packm_c = sub_packm_c; cntl->sub_herk = sub_herk; cntl->sub_unpackm_c = sub_unpackm_c; return cntl; }
void bli_pool_alloc_block( siz_t block_size, siz_t align_size, pblk_t* block ) { void* buf_sys; void* buf_align; // Allocate the block. We add the alignment size to ensure we will // have enough usable space after alignment. buf_sys = bli_malloc( block_size + align_size ); buf_align = buf_sys; // Advance the pointer to achieve the necessary alignment, if it // is not already aligned. if ( bli_is_unaligned_to( ( uintptr_t )buf_sys, ( uintptr_t )align_size ) ) { // Notice that this works even if the alignment is not a power of two. buf_align += ( ( uintptr_t )align_size - ( ( uintptr_t )buf_sys % align_size ) ); } // Save the results in the pblk_t structure. bli_pblk_set_buf_sys( buf_sys, block ); bli_pblk_set_buf_align( buf_align, block ); }
gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ) { gemv_t* cntl; cntl = ( gemv_t* ) bli_malloc( sizeof(gemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a = sub_packm_a; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_gemv = sub_gemv; cntl->sub_unpackv_y = sub_unpackv_y; return cntl; }
gemm_t* bli_gemm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, scalm_t* sub_scalm, packm_t* sub_packm_a, packm_t* sub_packm_b, packm_t* sub_packm_c, gemm_t* sub_gemm, unpackm_t* sub_unpackm_c ) { gemm_t* cntl; cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->sub_scalm = sub_scalm; cntl->sub_packm_a = sub_packm_a; cntl->sub_packm_b = sub_packm_b; cntl->sub_packm_c = sub_packm_c; cntl->sub_gemm = sub_gemm; cntl->sub_unpackm_c = sub_unpackm_c; return cntl; }
void bli_pool_alloc_block( siz_t block_size, siz_t align_size, pblk_t* block ) { void* buf_sys; void* buf_align; // Allocate the block. We add the alignment size to ensure we will // have enough usable space after alignment. buf_sys = bli_malloc( block_size + align_size ); buf_align = buf_sys; // Advance the pointer to achieve the necessary alignment, if it is not // already aligned. if ( bli_is_unaligned_to( buf_sys, align_size ) ) { // C99's stdint.h guarantees that a void* can be safely cast to a // uintptr_t and then back to a void*, hence the casting of buf_sys // and align_size to uintptr_t. buf_align is initially cast to char* // to allow pointer arithmetic in units of bytes, and then advanced // to the next nearest alignment boundary, and finally cast back to // void* before being stored. Notice that the arithmetic works even // if the alignment value is not a power of two. buf_align = ( void* )( ( char* )buf_align + ( ( uintptr_t )align_size - ( uintptr_t )buf_sys % ( uintptr_t )align_size ) ); } // Save the results in the pblk_t structure. bli_pblk_set_buf_sys( buf_sys, block ); bli_pblk_set_buf_align( buf_align, block ); }
packm_t* bli_packm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* mr, blksz_t* nr, bool_t does_densify, bool_t does_invert_diag, bool_t rev_iter_if_upper, bool_t rev_iter_if_lower, pack_t pack_schema, packbuf_t pack_buf_type ) { packm_t* cntl; cntl = ( packm_t* ) bli_malloc( sizeof(packm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->mr = mr; cntl->nr = nr; cntl->does_densify = does_densify; cntl->does_invert_diag = does_invert_diag; cntl->rev_iter_if_upper = rev_iter_if_upper; cntl->rev_iter_if_lower = rev_iter_if_lower; cntl->pack_schema = pack_schema; cntl->pack_buf_type = pack_buf_type; return cntl; }
her2_t* bli_her2_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ) { her2_t* cntl; cntl = ( her2_t* ) bli_malloc( sizeof(her2_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger_rp = sub_ger_rp; cntl->sub_ger_cp = sub_ger_cp; cntl->sub_her2 = sub_her2; cntl->sub_unpackm_c11 = sub_unpackm_c11; return cntl; }
hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, packv_t* sub_packv_y1, gemv_t* sub_gemv_n_rp, gemv_t* sub_gemv_n_cp, gemv_t* sub_gemv_t_rp, gemv_t* sub_gemv_t_cp, hemv_t* sub_hemv, unpackv_t* sub_unpackv_y1 ) { hemv_t* cntl; cntl = ( hemv_t* ) bli_malloc( sizeof(hemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_gemv_n_rp = sub_gemv_n_rp; cntl->sub_gemv_n_cp = sub_gemv_n_cp; cntl->sub_gemv_t_rp = sub_gemv_t_rp; cntl->sub_gemv_t_cp = sub_gemv_t_cp; cntl->sub_hemv = sub_hemv; cntl->sub_unpackv_y1 = sub_unpackv_y1; return cntl; }
void bli_pool_init( dim_t num_blocks, siz_t block_size, siz_t align_size, pool_t* pool ) { pblk_t* block_ptrs; dim_t i; // Allocate the block_ptrs array. block_ptrs = bli_malloc( num_blocks * sizeof( pblk_t ) ); // Allocate and initialize each entry in the block_ptrs array. for ( i = 0; i < num_blocks; ++i ) { bli_pool_alloc_block( block_size, align_size, &(block_ptrs[i]) ); } // Initialize the pool_t structure. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( num_blocks, pool ); bli_pool_set_num_blocks( num_blocks, pool ); bli_pool_set_top_index( 0, pool ); bli_pool_set_block_size( block_size, pool ); bli_pool_set_align_size( align_size, pool ); }
gemm_t* bli_gemm_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* b, func_t* gemm_ukrs_, scalm_t* sub_scalm, packm_t* sub_packm_a, packm_t* sub_packm_b, packm_t* sub_packm_c, gemm_t* sub_gemm, unpackm_t* sub_unpackm_c ) { gemm_t* cntl; cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->b = b; cntl->gemm_ukrs = gemm_ukrs_; // avoid name conflict with global symbol cntl->sub_scalm = sub_scalm; cntl->sub_packm_a = sub_packm_a; cntl->sub_packm_b = sub_packm_b; cntl->sub_packm_c = sub_packm_c; cntl->sub_gemm = sub_gemm; cntl->sub_unpackm_c = sub_unpackm_c; return cntl; }
//Constructors and destructors for thread infos thrinfo_t* bli_create_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id ) { thrinfo_t* thr = (thrinfo_t*) bli_malloc( sizeof(thrinfo_t) ); bli_setup_thread_info( thr, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); return thr; }
packm_thread_info_t* bli_create_packm_thread_info( thread_comm_t* communicator, dim_t tid, dim_t max_threads ) { packm_thread_info_t* to_ret = (packm_thread_info_t*) bli_malloc(sizeof(packm_thread_info_t)); to_ret->communicator = communicator; to_ret->tid = tid; to_ret->max_threads = max_threads; return to_ret; }
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type, varnum_t var_num ) { scalm_t* cntl; cntl = ( scalm_t* ) bli_malloc( sizeof(scalm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; return cntl; }
unpackv_t* bli_unpackv_cntl_obj_create( impl_t impl_type, varnum_t var_num ) { unpackv_t* cntl; cntl = ( unpackv_t* ) bli_malloc( sizeof(unpackv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; return cntl; }
void bli_level3_thread_decorator( dim_t n_threads, level3_int_t func, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, void* cntl, void** thread ) { pthread_t* pthreads = (pthread_t*) bli_malloc(sizeof(pthread_t) * n_threads); //Saying "datas" is kind of like saying "all y'all" thread_data_t* datas = (thread_data_t*) bli_malloc(sizeof(thread_data_t) * n_threads); //pthread_attr_t* attr = (pthread_attr_t*) bli_malloc(sizeof(pthread_attr_t) * n_threads); for( int i = 0; i < n_threads; i++ ) { //Setup the thread data datas[i].func = func; datas[i].alpha = alpha; datas[i].a = a; datas[i].b = b; datas[i].beta = beta; datas[i].c = c; datas[i].cntl = cntl; datas[i].thread = thread[i]; pthread_create( &pthreads[i], NULL, &thread_decorator_helper, &datas[i] ); } for( int i = 0; i < n_threads; i++) { pthread_join( pthreads[i], NULL ); } bli_free( pthreads ); bli_free( datas ); }
trmm_thrinfo_t* bli_create_trmm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id, packm_thrinfo_t* opackm, packm_thrinfo_t* ipackm, trmm_thrinfo_t* sub_trmm ) { trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc( sizeof( trmm_thrinfo_t ) ); bli_setup_trmm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, opackm, ipackm, sub_trmm ); return thread; }
packv_t* bli_packv_cntl_obj_create( impl_t impl_type, varnum_t var_num, blksz_t* mult_dim, pack_t pack_schema ) { packv_t* cntl; cntl = ( packv_t* ) bli_malloc( sizeof(packv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->mult_dim = mult_dim; cntl->pack_schema = pack_schema; return cntl; }
blksz_t* bli_blksz_obj_create( dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ) { blksz_t* b; b = ( blksz_t* ) bli_malloc( sizeof(blksz_t) ); bli_blksz_obj_init( b, b_s, be_s, b_d, be_d, b_c, be_c, b_z, be_z ); return b; }
ger_t* bli_ger_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ) { ger_t* cntl; cntl = ( ger_t* ) bli_malloc( sizeof(ger_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_packm_a = sub_packm_a; cntl->sub_ger = sub_ger; cntl->sub_unpackm_a = sub_unpackm_a; return cntl; }
void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ) { pblk_t* block_ptrs_cur; dim_t block_ptrs_len_cur; dim_t num_blocks_cur; pblk_t* block_ptrs_new; dim_t num_blocks_new; siz_t block_size; siz_t align_size; dim_t top_index; dim_t i; // If the requested increase is zero (or negative), return early. if ( num_blocks_add < 1 ) return; // Query the allocated length of the block_ptrs array and also the // total number of blocks allocated. block_ptrs_len_cur = bli_pool_block_ptrs_len( pool ); num_blocks_cur = bli_pool_num_blocks( pool ); // Compute the total number of allocated blocks that will exist // after we grow the pool. num_blocks_new = num_blocks_cur + num_blocks_add; // If the new total number of allocated blocks is larger than the // allocated length of the block_ptrs array, we need to allocate // a new (larger) block_ptrs array. if ( num_blocks_new > block_ptrs_len_cur ) { // Query the current block_ptrs array. block_ptrs_cur = bli_pool_block_ptrs( pool ); // Allocate a new block_ptrs array of length num_blocks_new. block_ptrs_new = bli_malloc( num_blocks_new * sizeof( pblk_t ) ); // Query the top_index of the pool. top_index = bli_pool_top_index( pool ); // Copy the contents of the old block_ptrs array to the new/resized // array. Notice that we can begin with top_index since all entries // from 0 to top_index-1 have been checked out to threads. for ( i = top_index; i < num_blocks_cur; ++i ) { //printf( "bli_pool_grow: copying from %lu\n", top_index ); block_ptrs_new[i] = block_ptrs_cur[i]; } //printf( "bli_pool_grow: bp_cur: %p\n", block_ptrs_cur ); // Free the old block_ptrs array. bli_free( block_ptrs_cur ); // Update the pool_t struct with the new block_ptrs array and // record its allocated length. bli_pool_set_block_ptrs( block_ptrs_new, pool ); bli_pool_set_block_ptrs_len( num_blocks_new, pool ); } // At this point, we are guaranteed to have enough unused elements // in the block_ptrs array to accommodate an additional num_blocks_add // blocks. // Query the current block_ptrs array (which was possibly just resized). block_ptrs_cur = bli_pool_block_ptrs( pool ); // Query the block size and alignment size of the current pool. block_size = bli_pool_block_size( pool ); align_size = bli_pool_align_size( pool ); // Allocate the requested additional blocks in the resized array. for ( i = num_blocks_cur; i < num_blocks_new; ++i ) { //printf( "libblis: growing pool, block_size = %lu\n", block_size ); fflush( stdout ); bli_pool_alloc_block( block_size, align_size, &(block_ptrs_cur[i]) ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); }
//Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; }