Пример #1
0
trsv_t* bli_trsv_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  scalv_t*   sub_scalv,
                                  packm_t*   sub_packm_a11,
                                  packv_t*   sub_packv_x1,
                                  gemv_t*    sub_gemv_rp,
                                  gemv_t*    sub_gemv_cp,
                                  trsv_t*    sub_trsv,
                                  unpackv_t* sub_unpackv_x1 )
{
	trsv_t* cntl;

	cntl = ( trsv_t* ) bli_malloc( sizeof(trsv_t) );	

	cntl->impl_type      = impl_type;
	cntl->var_num        = var_num;
	cntl->b              = b;
	cntl->sub_scalv      = sub_scalv;
	cntl->sub_packm_a11  = sub_packm_a11;
	cntl->sub_packv_x1   = sub_packv_x1;
	cntl->sub_gemv_rp    = sub_gemv_rp;
	cntl->sub_gemv_cp    = sub_gemv_cp;
	cntl->sub_trsv       = sub_trsv;
	cntl->sub_unpackv_x1 = sub_unpackv_x1;

	return cntl;
}
Пример #2
0
trsm_t* bli_trsm_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  func_t*    gemm_ukrs_,
                                  func_t*    gemmtrsm_l_ukrs_,
                                  func_t*    gemmtrsm_u_ukrs_,
                                  scalm_t*   sub_scalm,
                                  packm_t*   sub_packm_a,
                                  packm_t*   sub_packm_b,
                                  packm_t*   sub_packm_c,
                                  trsm_t*    sub_trsm,
                                  gemm_t*    sub_gemm,
                                  unpackm_t* sub_unpackm_c )
{
    trsm_t* cntl;

    cntl = ( trsm_t* ) bli_malloc( sizeof(trsm_t) );

    cntl->impl_type       = impl_type;
    cntl->var_num         = var_num;
    cntl->b               = b;
    cntl->gemm_ukrs       = gemm_ukrs_;
    cntl->gemmtrsm_l_ukrs = gemmtrsm_l_ukrs_;
    cntl->gemmtrsm_u_ukrs = gemmtrsm_u_ukrs_;
    cntl->sub_scalm       = sub_scalm;
    cntl->sub_packm_a     = sub_packm_a;
    cntl->sub_packm_b     = sub_packm_b;
    cntl->sub_packm_c     = sub_packm_c;
    cntl->sub_trsm        = sub_trsm;
    cntl->sub_gemm        = sub_gemm;
    cntl->sub_unpackm_c   = sub_unpackm_c;

    return cntl;
}
Пример #3
0
herk_t* bli_herk_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  blksz_t*   b_aux,
                                  scalm_t*   sub_scalm,
                                  packm_t*   sub_packm_a,
                                  packm_t*   sub_packm_b,
                                  packm_t*   sub_packm_c,
                                  herk_t*    sub_herk,
                                  unpackm_t* sub_unpackm_c )
{
	herk_t* cntl;

	cntl = ( herk_t* ) bli_malloc( sizeof(herk_t) );	

	cntl->impl_type     = impl_type;
	cntl->var_num       = var_num;
	cntl->b             = b;
	cntl->b_aux         = b_aux;
	cntl->sub_scalm     = sub_scalm;
	cntl->sub_packm_a   = sub_packm_a;
	cntl->sub_packm_b   = sub_packm_b;
	cntl->sub_packm_c   = sub_packm_c;
	cntl->sub_herk      = sub_herk;
	cntl->sub_unpackm_c = sub_unpackm_c;

	return cntl;
}
Пример #4
0
void bli_pool_alloc_block( siz_t   block_size,
                           siz_t   align_size,
                           pblk_t* block )
{
	void* buf_sys;
	void* buf_align;

	// Allocate the block. We add the alignment size to ensure we will
	// have enough usable space after alignment.
	buf_sys   = bli_malloc( block_size + align_size );
	buf_align = buf_sys;

	// Advance the pointer to achieve the necessary alignment, if it
	// is not already aligned.
	if ( bli_is_unaligned_to( ( uintptr_t )buf_sys, ( uintptr_t )align_size ) )
	{
		// Notice that this works even if the alignment is not a power of two.
		buf_align += (   ( uintptr_t )align_size - 
		               ( ( uintptr_t )buf_sys % align_size ) );
	}
	
	// Save the results in the pblk_t structure.
	bli_pblk_set_buf_sys( buf_sys, block );
	bli_pblk_set_buf_align( buf_align, block );
}
Пример #5
0
gemv_t* bli_gemv_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  scalv_t*   sub_scalv,
                                  packm_t*   sub_packm_a,
                                  packv_t*   sub_packv_x,
                                  packv_t*   sub_packv_y,
                                  gemv_t*    sub_gemv,
                                  unpackv_t* sub_unpackv_y )
{
    gemv_t* cntl;

    cntl = ( gemv_t* ) bli_malloc( sizeof(gemv_t) );

    cntl->impl_type     = impl_type;
    cntl->var_num       = var_num;
    cntl->b             = b;
    cntl->sub_scalv     = sub_scalv;
    cntl->sub_packm_a   = sub_packm_a;
    cntl->sub_packv_x   = sub_packv_x;
    cntl->sub_packv_y   = sub_packv_y;
    cntl->sub_gemv      = sub_gemv;
    cntl->sub_unpackv_y = sub_unpackv_y;

    return cntl;
}
Пример #6
0
gemm_t* bli_gemm_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  scalm_t*   sub_scalm,
                                  packm_t*   sub_packm_a,
                                  packm_t*   sub_packm_b,
                                  packm_t*   sub_packm_c,
                                  gemm_t*    sub_gemm,
                                  unpackm_t* sub_unpackm_c )
{
	gemm_t* cntl;

	cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) );	

	cntl->impl_type     = impl_type;
	cntl->var_num       = var_num;
	cntl->b             = b;
	cntl->sub_scalm     = sub_scalm;
	cntl->sub_packm_a   = sub_packm_a;
	cntl->sub_packm_b   = sub_packm_b;
	cntl->sub_packm_c   = sub_packm_c;
	cntl->sub_gemm      = sub_gemm;
	cntl->sub_unpackm_c = sub_unpackm_c;

	return cntl;
}
Пример #7
0
void bli_pool_alloc_block( siz_t   block_size,
                           siz_t   align_size,
                           pblk_t* block )
{
	void* buf_sys;
	void* buf_align;

	// Allocate the block. We add the alignment size to ensure we will
	// have enough usable space after alignment.
	buf_sys   = bli_malloc( block_size + align_size );
	buf_align = buf_sys;

	// Advance the pointer to achieve the necessary alignment, if it is not
	// already aligned.
	if ( bli_is_unaligned_to( buf_sys, align_size ) )
	{
		// C99's stdint.h guarantees that a void* can be safely cast to a
		// uintptr_t and then back to a void*, hence the casting of buf_sys
		// and align_size to uintptr_t. buf_align is initially cast to char*
		// to allow pointer arithmetic in units of bytes, and then advanced
		// to the next nearest alignment boundary, and finally cast back to
		// void* before being stored. Notice that the arithmetic works even
		// if the alignment value is not a power of two.
		buf_align = ( void* )(   ( char*     )buf_align +
		                       ( ( uintptr_t )align_size -
		                         ( uintptr_t )buf_sys %
		                         ( uintptr_t )align_size )
		                     );
	}
	
	// Save the results in the pblk_t structure.
	bli_pblk_set_buf_sys( buf_sys, block );
	bli_pblk_set_buf_align( buf_align, block );
}
Пример #8
0
packm_t* bli_packm_cntl_obj_create( impl_t     impl_type,
                                    varnum_t   var_num,
                                    blksz_t*   mr,
                                    blksz_t*   nr,
                                    bool_t     does_densify,
                                    bool_t     does_invert_diag,
                                    bool_t     rev_iter_if_upper,
                                    bool_t     rev_iter_if_lower,
                                    pack_t     pack_schema,
                                    packbuf_t  pack_buf_type )
{
	packm_t* cntl;

	cntl = ( packm_t* ) bli_malloc( sizeof(packm_t) );

	cntl->impl_type         = impl_type;
	cntl->var_num           = var_num;
	cntl->mr                = mr;
	cntl->nr                = nr;
	cntl->does_densify      = does_densify;
	cntl->does_invert_diag  = does_invert_diag;
	cntl->rev_iter_if_upper = rev_iter_if_upper;
	cntl->rev_iter_if_lower = rev_iter_if_lower;
	cntl->pack_schema       = pack_schema;
	cntl->pack_buf_type     = pack_buf_type;

	return cntl;
}
Пример #9
0
her2_t* bli_her2_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  packv_t*   sub_packv_x1,
                                  packv_t*   sub_packv_y1,
                                  packm_t*   sub_packm_c11,
                                  ger_t*     sub_ger_rp,
                                  ger_t*     sub_ger_cp,
                                  her2_t*    sub_her2,
                                  unpackm_t* sub_unpackm_c11 )
{
	her2_t* cntl;

	cntl = ( her2_t* ) bli_malloc( sizeof(her2_t) );	

	cntl->impl_type       = impl_type;
	cntl->var_num         = var_num;
	cntl->b               = b;
	cntl->sub_packv_x1    = sub_packv_x1;
	cntl->sub_packv_y1    = sub_packv_y1;
	cntl->sub_packm_c11   = sub_packm_c11;
	cntl->sub_ger_rp      = sub_ger_rp;
	cntl->sub_ger_cp      = sub_ger_cp;
	cntl->sub_her2        = sub_her2;
	cntl->sub_unpackm_c11 = sub_unpackm_c11;

	return cntl;
}
Пример #10
0
hemv_t* bli_hemv_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  bszid_t    bszid,
                                  scalv_t*   sub_scalv,
                                  packm_t*   sub_packm_a11,
                                  packv_t*   sub_packv_x1,
                                  packv_t*   sub_packv_y1,
                                  gemv_t*    sub_gemv_n_rp,
                                  gemv_t*    sub_gemv_n_cp,
                                  gemv_t*    sub_gemv_t_rp,
                                  gemv_t*    sub_gemv_t_cp,
                                  hemv_t*    sub_hemv,
                                  unpackv_t* sub_unpackv_y1 )
{
	hemv_t* cntl;

	cntl = ( hemv_t* ) bli_malloc( sizeof(hemv_t) );	

	cntl->impl_type      = impl_type;
	cntl->var_num        = var_num;
	cntl->bszid          = bszid;
	cntl->sub_scalv      = sub_scalv;
	cntl->sub_packm_a11  = sub_packm_a11;
	cntl->sub_packv_x1   = sub_packv_x1;
	cntl->sub_packv_y1   = sub_packv_y1;
	cntl->sub_gemv_n_rp  = sub_gemv_n_rp;
	cntl->sub_gemv_n_cp  = sub_gemv_n_cp;
	cntl->sub_gemv_t_rp  = sub_gemv_t_rp;
	cntl->sub_gemv_t_cp  = sub_gemv_t_cp;
	cntl->sub_hemv       = sub_hemv;
	cntl->sub_unpackv_y1 = sub_unpackv_y1;

	return cntl;
}
Пример #11
0
void bli_pool_init( dim_t   num_blocks,
                    siz_t   block_size,
                    siz_t   align_size,
                    pool_t* pool )
{
	pblk_t* block_ptrs;
	dim_t   i;

	// Allocate the block_ptrs array.
	block_ptrs = bli_malloc( num_blocks * sizeof( pblk_t ) );

	// Allocate and initialize each entry in the block_ptrs array.
	for ( i = 0; i < num_blocks; ++i )
	{
		bli_pool_alloc_block( block_size, align_size, &(block_ptrs[i]) );
	}

	// Initialize the pool_t structure.
	bli_pool_set_block_ptrs( block_ptrs, pool );
	bli_pool_set_block_ptrs_len( num_blocks, pool );
	bli_pool_set_num_blocks( num_blocks, pool );
	bli_pool_set_top_index( 0, pool );
	bli_pool_set_block_size( block_size, pool );
	bli_pool_set_align_size( align_size, pool );
}
Пример #12
0
gemm_t* bli_gemm_cntl_obj_create( impl_t     impl_type,
                                  varnum_t   var_num,
                                  blksz_t*   b,
                                  func_t*    gemm_ukrs_,
                                  scalm_t*   sub_scalm,
                                  packm_t*   sub_packm_a,
                                  packm_t*   sub_packm_b,
                                  packm_t*   sub_packm_c,
                                  gemm_t*    sub_gemm,
                                  unpackm_t* sub_unpackm_c )
{
	gemm_t* cntl;

	cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) );

	cntl->impl_type     = impl_type;
	cntl->var_num       = var_num;
	cntl->b             = b;
	cntl->gemm_ukrs     = gemm_ukrs_; // avoid name conflict with global symbol
	cntl->sub_scalm     = sub_scalm;
	cntl->sub_packm_a   = sub_packm_a;
	cntl->sub_packm_b   = sub_packm_b;
	cntl->sub_packm_c   = sub_packm_c;
	cntl->sub_gemm      = sub_gemm;
	cntl->sub_unpackm_c = sub_unpackm_c;

	return cntl;
}
Пример #13
0
//Constructors and destructors for thread infos
thrinfo_t* bli_create_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
                             dim_t n_way, dim_t work_id )
{

        thrinfo_t* thr = (thrinfo_t*) bli_malloc( sizeof(thrinfo_t) );
        bli_setup_thread_info( thr, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id );
        return thr;
}
Пример #14
0
packm_thread_info_t* bli_create_packm_thread_info( thread_comm_t* communicator, dim_t tid, dim_t max_threads )
{
    packm_thread_info_t* to_ret = (packm_thread_info_t*) bli_malloc(sizeof(packm_thread_info_t));

    to_ret->communicator = communicator;
    to_ret->tid = tid;
    to_ret->max_threads = max_threads;

    return to_ret;
}
Пример #15
0
scalm_t* bli_scalm_cntl_obj_create( impl_t     impl_type,
                                    varnum_t   var_num )
{
	scalm_t* cntl;

	cntl = ( scalm_t* ) bli_malloc( sizeof(scalm_t) );	

	cntl->impl_type = impl_type;
	cntl->var_num   = var_num;

	return cntl;
}
Пример #16
0
unpackv_t* bli_unpackv_cntl_obj_create( impl_t     impl_type,
                                        varnum_t   var_num )
{
	unpackv_t* cntl;

	cntl = ( unpackv_t* ) bli_malloc( sizeof(unpackv_t) );

	cntl->impl_type = impl_type;
	cntl->var_num   = var_num;

	return cntl;
}
void bli_level3_thread_decorator( dim_t n_threads,
                                  level3_int_t func,
                                  obj_t* alpha,
                                  obj_t* a,
                                  obj_t* b,
                                  obj_t* beta,
                                  obj_t* c,
                                  void* cntl,
                                  void** thread )
{
    pthread_t* pthreads = (pthread_t*) bli_malloc(sizeof(pthread_t) * n_threads);
    //Saying "datas" is kind of like saying "all y'all"
    thread_data_t* datas = (thread_data_t*) bli_malloc(sizeof(thread_data_t) * n_threads);
    //pthread_attr_t* attr = (pthread_attr_t*) bli_malloc(sizeof(pthread_attr_t) * n_threads);

    for( int i = 0; i < n_threads; i++ )
    {
        //Setup the thread data
        datas[i].func = func;
        datas[i].alpha = alpha;
        datas[i].a = a;
        datas[i].b = b;
        datas[i].beta = beta;
        datas[i].c = c;
        datas[i].cntl = cntl;
        datas[i].thread = thread[i];
        pthread_create( &pthreads[i], NULL, &thread_decorator_helper, &datas[i] );
    }

    for( int i = 0; i < n_threads; i++)
    {
        pthread_join( pthreads[i], NULL );
    }

    bli_free( pthreads );
    bli_free( datas );
}
Пример #18
0
trmm_thrinfo_t* bli_create_trmm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id,
                                              thread_comm_t* icomm, dim_t icomm_id,
                                              dim_t n_way, dim_t work_id, 
                                              packm_thrinfo_t* opackm,
                                              packm_thrinfo_t* ipackm,
                                              trmm_thrinfo_t* sub_trmm )
{
    trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc( sizeof( trmm_thrinfo_t ) );
    bli_setup_trmm_thrinfo_node( thread, ocomm, ocomm_id,
                              icomm, icomm_id,
                              n_way, work_id, 
                              opackm,
                              ipackm,
                              sub_trmm );
    return thread;
}
Пример #19
0
packv_t* bli_packv_cntl_obj_create( impl_t     impl_type,
                                    varnum_t   var_num,
                                    blksz_t*   mult_dim,
                                    pack_t     pack_schema )
{
	packv_t* cntl;

	cntl = ( packv_t* ) bli_malloc( sizeof(packv_t) );

	cntl->impl_type        = impl_type;
	cntl->var_num          = var_num;
	cntl->mult_dim         = mult_dim;
	cntl->pack_schema      = pack_schema;

	return cntl;
}
Пример #20
0
blksz_t* bli_blksz_obj_create( dim_t b_s, dim_t be_s,
                               dim_t b_d, dim_t be_d,
                               dim_t b_c, dim_t be_c,
                               dim_t b_z, dim_t be_z )
{
	blksz_t* b;

	b = ( blksz_t* ) bli_malloc( sizeof(blksz_t) );	

	bli_blksz_obj_init( b,
	                    b_s, be_s,
	                    b_d, be_d,
	                    b_c, be_c,
	                    b_z, be_z );

	return b;
}
Пример #21
0
ger_t* bli_ger_cntl_obj_create( impl_t     impl_type,
                                varnum_t   var_num,
                                bszid_t    bszid,
                                packv_t*   sub_packv_x,
                                packv_t*   sub_packv_y,
                                packm_t*   sub_packm_a,
                                ger_t*     sub_ger,
                                unpackm_t* sub_unpackm_a )
{
	ger_t* cntl;

	cntl = ( ger_t* ) bli_malloc( sizeof(ger_t) );

	cntl->impl_type     = impl_type;
	cntl->var_num       = var_num;
	cntl->bszid         = bszid;
	cntl->sub_packv_x   = sub_packv_x;
	cntl->sub_packv_y   = sub_packv_y;
	cntl->sub_packm_a   = sub_packm_a;
	cntl->sub_ger       = sub_ger;
	cntl->sub_unpackm_a = sub_unpackm_a;

	return cntl;
}
Пример #22
0
void bli_pool_grow( dim_t num_blocks_add, pool_t* pool )
{
	pblk_t* block_ptrs_cur;
	dim_t   block_ptrs_len_cur;
	dim_t   num_blocks_cur;

	pblk_t* block_ptrs_new;
	dim_t   num_blocks_new;

	siz_t   block_size;
	siz_t   align_size;
	dim_t   top_index;

	dim_t   i;

	// If the requested increase is zero (or negative), return early.
	if ( num_blocks_add < 1 ) return;

	// Query the allocated length of the block_ptrs array and also the
	// total number of blocks allocated.
	block_ptrs_len_cur = bli_pool_block_ptrs_len( pool );
	num_blocks_cur     = bli_pool_num_blocks( pool );

	// Compute the total number of allocated blocks that will exist
	// after we grow the pool.
	num_blocks_new = num_blocks_cur + num_blocks_add;

	// If the new total number of allocated blocks is larger than the
	// allocated length of the block_ptrs array, we need to allocate
	// a new (larger) block_ptrs array.
	if ( num_blocks_new > block_ptrs_len_cur )
	{
		// Query the current block_ptrs array.
		block_ptrs_cur = bli_pool_block_ptrs( pool );

		// Allocate a new block_ptrs array of length num_blocks_new.
		block_ptrs_new = bli_malloc( num_blocks_new * sizeof( pblk_t ) );

		// Query the top_index of the pool.
		top_index = bli_pool_top_index( pool );

		// Copy the contents of the old block_ptrs array to the new/resized
		// array. Notice that we can begin with top_index since all entries
		// from 0 to top_index-1 have been checked out to threads.
		for ( i = top_index; i < num_blocks_cur; ++i ) 
		{
//printf( "bli_pool_grow: copying from %lu\n", top_index );
			block_ptrs_new[i] = block_ptrs_cur[i];
		}

//printf( "bli_pool_grow: bp_cur: %p\n", block_ptrs_cur );
		// Free the old block_ptrs array.
		bli_free( block_ptrs_cur );

		// Update the pool_t struct with the new block_ptrs array and
		// record its allocated length.
		bli_pool_set_block_ptrs( block_ptrs_new, pool );
		bli_pool_set_block_ptrs_len( num_blocks_new, pool );
	}

	// At this point, we are guaranteed to have enough unused elements
	// in the block_ptrs array to accommodate an additional num_blocks_add
	// blocks.

	// Query the current block_ptrs array (which was possibly just resized).
	block_ptrs_cur = bli_pool_block_ptrs( pool );

	// Query the block size and alignment size of the current pool.
	block_size = bli_pool_block_size( pool );
	align_size = bli_pool_align_size( pool );

	// Allocate the requested additional blocks in the resized array.
	for ( i = num_blocks_cur; i < num_blocks_new; ++i ) 
	{
//printf( "libblis: growing pool, block_size = %lu\n", block_size ); fflush( stdout );

		bli_pool_alloc_block( block_size, align_size, &(block_ptrs_cur[i]) );
	}

	// Update the pool_t struct with the new number of allocated blocks.
	// Notice that top_index remains unchanged, as do the block_size and
	// align_size fields.
	bli_pool_set_num_blocks( num_blocks_new, pool );
}
Пример #23
0
//Constructors and destructors for constructors
thread_comm_t* bli_create_communicator( dim_t n_threads )
{
    thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) );
    bli_setup_communicator( comm, n_threads );
    return comm;
}