コード例 #1
0
	const num_t     dt          = PASTEMAC(ch,type); \
\
	/* Alias some constants to simpler names. */ \
	const dim_t     MR          = pd_a; \
	const dim_t     NR          = pd_b; \
	const dim_t     PACKMR      = cs_a; \
	const dim_t     PACKNR      = rs_b; \
\
	/* Cast the micro-kernel address to its function pointer type. */ \
	/* NOTE: We use the upper-triangular gemmtrsm ukernel because, while
	   the current macro-kernel targets the "rl" case (right-side/lower-
	   triangular), it becomes upper-triangular after the kernel operation
	   is transposed so that all kernel instances are of the "left"
	   variety (since those are the only trsm ukernels that exist). */ \
	PASTECH(ch,gemmtrsm_ukr_ft) \
	               gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
	PASTECH(ch,gemm_ukr_ft) \
	                   gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
	/* Temporary C buffer for edge cases. Note that the strides of this
	   temporary buffer are set so that they match the storage of the
	   original C matrix. For example, if C is column-stored, ct will be
	   column-stored as well. */ \
	ctype           ct[ BLIS_STACK_BUF_MAX_SIZE \
	                    / sizeof( ctype ) ] \
	                    __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
	const inc_t     rs_ct      = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
	const inc_t     cs_ct      = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
\
	ctype* restrict zero        = PASTEMAC(ch,0); \
	ctype* restrict minus_one   = PASTEMAC(ch,m1); \
コード例 #2
0
       cntx_t* cntx, \
       thrinfo_t* thread  \
     ) \
{ \
	const num_t     dt         = PASTEMAC(ch,type); \
\
	/* Alias some constants to simpler names. */ \
	const dim_t     MR         = pd_a; \
	const dim_t     NR         = pd_b; \
	/*const dim_t     PACKMR     = cs_a;*/ \
	/*const dim_t     PACKNR     = rs_b;*/ \
\
	/* Query the context for the micro-kernel address and cast it to its
	   function pointer type. */ \
	PASTECH(ch,gemm_ukr_ft) \
	                gemm_ukr   = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
	/* Temporary C buffer for edge cases. Note that the strides of this
	   temporary buffer are set so that they match the storage of the
	   original C matrix. For example, if C is column-stored, ct will be
	   column-stored as well. */ \
	ctype           ct[ BLIS_STACK_BUF_MAX_SIZE \
	                    / sizeof( ctype ) ] \
	                    __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
	const bool_t    col_pref    = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
	const inc_t     rs_ct       = ( col_pref ? 1 : NR ); \
	const inc_t     cs_ct       = ( col_pref ? MR : 1 ); \
\
	ctype* restrict zero       = PASTEMAC(ch,0); \
	ctype* restrict a_cast     = a; \
	ctype* restrict b_cast     = b; \
コード例 #3
0
ファイル: bli_l3_ukr_tapi.c プロジェクト: ShawnLess/blis
     ( \
       dim_t               k, \
       ctype*     restrict alpha, \
       ctype*     restrict a, \
       ctype*     restrict b, \
       ctype*     restrict beta, \
       ctype*     restrict c, inc_t rs_c, inc_t cs_c, \
       auxinfo_t* restrict data, \
       cntx_t*    restrict cntx  \
     ) \
{ \
	const num_t dt = PASTEMAC(ch,type); \
\
	/* Query the context for the function address of the current
	   datatype's micro-kernel. */ \
	PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
\
	/* Invoke the typed function for the given datatype. */ \
	f( \
	   k, \
	   alpha, \
	   a, \
	   b, \
	   beta, \
	   c, rs_c, cs_c, \
	   data, \
	   cntx  \
	 ); \
} \

INSERT_GENTFUNC_BASIC2( gemm_ukernel, gemm_ukr, BLIS_GEMM_UKR )