Exemple #1
0
void bli_hemv_cntl_init()
{
	// Create blocksize objects.
	hemv_mc = bli_blksz_obj_create( BLIS_HEMV_MC_S, 0,
	                                BLIS_HEMV_MC_D, 0,
	                                BLIS_HEMV_MC_C, 0,
	                                BLIS_HEMV_MC_Z, 0 );


	// Create control trees for the lowest-level kernels. These trees induce
	// operations on (presumably) relatively small block-subvector problems.
	hemv_cntl_bs_ke_lrow_ucol
	=
	bli_hemv_cntl_obj_create( BLIS_UNB_FUSED,
	                          BLIS_VARIANT1,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL );
	hemv_cntl_bs_ke_lcol_urow
	=
	bli_hemv_cntl_obj_create( BLIS_UNB_FUSED,
	                          BLIS_VARIANT3,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL );


	// Create control trees for generally large problems. Here, we choose a
	// variant that prioritizes keeping a subvector of y in cache.
	hemv_cntl_ge_lrow_ucol
	=
	bli_hemv_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          hemv_mc,
	                          scalv_cntl,           // scale y up-front
	                          packm_cntl_noscale,   // pack A11 (if needed)
	                          packv_cntl,           // pack x1 (if needed)
	                          packv_cntl,           // pack y1 (if needed)
	                          gemv_cntl_rp_bs_dot,  // gemv_n_rp needed by var2
	                          NULL,                 // gemv_n_cp not used by var2
	                          NULL,                 // gemv_t_rp not used by var2
	                          gemv_cntl_rp_bs_axpy, // gemv_t_cp needed by var2
	                          hemv_cntl_bs_ke_lrow_ucol,
	                          unpackv_cntl );       // unpack y1 (if packed)
	hemv_cntl_ge_lcol_urow
	=
	bli_hemv_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          hemv_mc,
	                          scalv_cntl,           // scale y up-front
	                          packm_cntl_noscale,   // pack A11 (if needed)
	                          packv_cntl,           // pack x1 (if needed)
	                          packv_cntl,           // pack y1 (if needed)
	                          gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2
	                          NULL,                 // gemv_n_cp not used by var2
	                          NULL,                 // gemv_t_rp not used by var2
	                          gemv_cntl_rp_bs_dot,  // gemv_t_cp needed by var2
	                          hemv_cntl_bs_ke_lcol_urow,
	                          unpackv_cntl );       // unpack y1 (if packed)
}
Exemple #2
0
void bli_packv_cntl_init()
{
	packv_mult_dim  = bli_blksz_obj_create( BLIS_DEFAULT_VR_S, 0,
	                                        BLIS_DEFAULT_VR_D, 0,
	                                        BLIS_DEFAULT_VR_C, 0,
	                                        BLIS_DEFAULT_VR_Z, 0 );

	packv_cntl = bli_packv_cntl_obj_create( BLIS_UNBLOCKED,
	                                        BLIS_VARIANT1,
	                                        packv_mult_dim,
	                                        BLIS_PACKED_VECTOR );
}
Exemple #3
0
void bli_trsv_cntl_init()
{
	// Create blocksize objects.
	trsv_mc = bli_blksz_obj_create( BLIS_TRSV_MC_S, 0,
	                                BLIS_TRSV_MC_D, 0,
	                                BLIS_TRSV_MC_C, 0,
	                                BLIS_TRSV_MC_Z, 0 );

	// Create control trees for the lowest-level kernels. These trees induce
	// operations on (presumably) relatively small block-subvector problems.
	trsv_cntl_bs_ke_nrow_tcol
	=
	bli_trsv_cntl_obj_create( BLIS_UNB_FUSED,
	                          BLIS_VARIANT1,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL,
	                          NULL, NULL );
	trsv_cntl_bs_ke_ncol_trow
	=
	bli_trsv_cntl_obj_create( BLIS_UNB_FUSED,
	                          BLIS_VARIANT2,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL,
	                          NULL, NULL );

	// Create control trees for generally large problems. Here we choose a
	// variant that prioritizes keeping a subvector of x in cache.
	trsv_cntl_ge_nrow_tcol
    =
	bli_trsv_cntl_obj_create( BLIS_BLOCKED,
                              BLIS_VARIANT1,           // use var1 to maximize x1 usage
                              trsv_mc,
                              scalv_cntl,              // scale x up-front
                              packm_cntl_noscale,      // pack A11 (if needed)
                              packv_cntl,              // pack x1 (if needed)
                              gemv_cntl_rp_bs_dot,     // gemv_rp needed by var1
                              NULL,                    // gemv_cp not needed by var1
                              trsv_cntl_bs_ke_nrow_tcol,
                              unpackv_cntl );          // unpack x1 (if needed)
	trsv_cntl_ge_ncol_trow
    =
	bli_trsv_cntl_obj_create( BLIS_BLOCKED,
                              BLIS_VARIANT1,           // use var1 to maximize x1 usage
                              trsv_mc,
                              scalv_cntl,              // scale x up-front
                              packm_cntl_noscale,      // pack A11 (if needed)
                              packv_cntl,              // pack x1 (if needed)
                              gemv_cntl_rp_bs_axpy,    // gemv_rp needed by var1
                              NULL,                    // gemv_cp not needed by var1
                              trsv_cntl_bs_ke_ncol_trow,
                              unpackv_cntl );          // unpack x1 (if needed)
}
Exemple #4
0
void bli_her_cntl_init()
{
	// Create blocksize objects.
	her_mc = bli_blksz_obj_create( BLIS_HER_MC_S, 0,
	                               BLIS_HER_MC_D, 0,
	                               BLIS_HER_MC_C, 0,
	                               BLIS_HER_MC_Z, 0 );


	// Create control trees for the lowest-level kernels. These trees induce
	// operations on (persumably) relatively small block-subvector problems.
	her_cntl_bs_ke_lrow_ucol
	=
	bli_her_cntl_obj_create( BLIS_UNBLOCKED,
	                         BLIS_VARIANT1,
	                         NULL, NULL, NULL,
	                         NULL, NULL, NULL );
	her_cntl_bs_ke_lcol_urow
	=
	bli_her_cntl_obj_create( BLIS_UNBLOCKED,
	                         BLIS_VARIANT2,
	                         NULL, NULL, NULL,
	                         NULL, NULL, NULL );


	// Create control trees for generally large problems. Here, we choose
	// variants that partition for ger subproblems in the same direction
	// as the assumed storage.
	her_cntl_ge_lrow_ucol
	=
	bli_her_cntl_obj_create( BLIS_BLOCKED,
	                         BLIS_VARIANT1,
	                         her_mc,
	                         packv_cntl,       // pack x1 (if needed)
	                         NULL,             // do NOT pack C11
	                         ger_cntl_rp_bs_row,
	                         her_cntl_bs_ke_lrow_ucol,
	                         NULL );           // no unpacking needed
	her_cntl_ge_lcol_urow
	=
	bli_her_cntl_obj_create( BLIS_BLOCKED,
	                         BLIS_VARIANT2,
	                         her_mc,
	                         packv_cntl,       // pack x1 (if needed)
	                         NULL,             // do NOT pack C11
	                         ger_cntl_cp_bs_col,
	                         her_cntl_bs_ke_lcol_urow,
	                         NULL );           // no unpacking needed
}
Exemple #5
0
void bli_gemm4mb_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm4mb_mc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MC_S/2, BLIS_MAXIMUM_MC_S/2,
	                      BLIS_DEFAULT_MC_D/2, BLIS_MAXIMUM_MC_D/2 );
	gemm4mb_nc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NC_S/2, BLIS_MAXIMUM_NC_S/2,
	                      BLIS_DEFAULT_NC_D/2, BLIS_MAXIMUM_NC_D/2 );
	gemm4mb_kc
	=
	bli_blksz_obj_create( 0,                   0,
	                      0,                   0,
	                      BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
	gemm4mb_mr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
	gemm4mb_nr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
	gemm4mb_kr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm4mb_mr, gemm4mb_mc );
	bli_blksz_obj_attach_mult_to( gemm4mb_nr, gemm4mb_nc );
	bli_blksz_obj_attach_mult_to( gemm4mb_kr, gemm4mb_kc );


	// The cache blocksizes that were scaled above need to be rounded down
	// to their respective nearest register blocksize multiples. Note that
	// this can only happen after the appropriate register blocksize is
	// actually attached as a multiple.
	bli_blksz_reduce_to_mult( gemm4mb_mc );
	bli_blksz_reduce_to_mult( gemm4mb_nc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm4mb_ukrs
	=
	bli_func_obj_create(
	    NULL,                  FALSE,
	    NULL,                  FALSE,
	    BLIS_CGEMM4MB_UKERNEL, BLIS_CGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS,
	    BLIS_ZGEMM4MB_UKERNEL, BLIS_ZGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations.
	gemm4mb_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mb_mr,
	                           gemm4mb_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_4MI,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mb_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mb_kr,
	                           gemm4mb_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_4MI,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm4mb_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT3,
	                          NULL,
	                          gemm4mb_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm4mb_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mb_mc,
	                          NULL,
	                          NULL,
	                          gemm4mb_packa_cntl,
	                          gemm4mb_packb_cntl,
	                          NULL,
	                          gemm4mb_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm4mb_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mb_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mb_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm4mb_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mb_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mb_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm4mb_cntl = gemm4mb_cntl_vl_mm;

}
Exemple #6
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D,
	                      BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C,
	                      BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z );
	gemm_nc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D,
	                      BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C,
	                      BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z );
	gemm_kc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D,
	                      BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C,
	                      BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z );
	gemm_mr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D,
	                      BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C,
	                      BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z );
	gemm_nr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D,
	                      BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C,
	                      BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z );
	gemm_kr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D,
	                      BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C,
	                      BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z );


	// Create objects for micro-panel alignment (in bytes).
	gemm_upanel_a_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 );
	gemm_upanel_b_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 );


	// Attach the register blksz_t objects as sub-blocksizes to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_to( gemm_mr, gemm_mc );
	bli_blksz_obj_attach_to( gemm_nr, gemm_nc );
	bli_blksz_obj_attach_to( gemm_kr, gemm_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          gemm_ukrs,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          gemm_ukrs,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          gemm_ukrs,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;
}
Exemple #7
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
	                                BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
	                                BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
	                                BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );

	gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
	                                BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
	                                BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
	                                BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );

	gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
	                                BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
	                                BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
	                                BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );

	gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
	                                BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
	                                BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
	                                BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );

	gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
	                                BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
	                                BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
	                                BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );

	gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
	                                BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
	                                BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
	                                BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );

	// Create control tree objects for packm/unpackm operations on C.
	gemm_packc_cntl
	=
	bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COLUMNS,
	                           BLIS_BUFFER_FOR_C_PANEL );

	gemm_unpackc_cntl
	=
	bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
	                             BLIS_VARIANT1,
	                             NULL ); // no blocksize needed


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;

#if 0
	//
	// Create a control tree for packing A, and streaming B and C.
	//

	gemm_cntl_bp_ke5
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT5,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL );
	gemm_cntl_pm_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          gemm_packa_cntl,
	                          NULL,
	                          //gemm_packc_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke5,
	                          //gemm_unpackc_cntl );
	                          NULL );

	gemm_cntl_mm_pm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_pm_bp,
	                          NULL );

	gemm_cntl_vl_mm5
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_pm,
	                          NULL );

	gemm_cntl_packa = gemm_cntl_vl_mm5;
#endif
}
Exemple #8
0
void bli_gemm3m3_cntl_init()
{
    // Create blocksize objects for each dimension.
    // NOTE: the complex blocksizes for 3m3 are generally equal to their
    // corresponding real domain counterparts. However, we want to promote
    // similar cache footprints for the micro-panels of A and B (when
    // compared to executing in the real domain), and since the complex
    // micro-panels are three times as "fat" (due to storing real, imaginary
    // and real+imaginary parts), we reduce KC by a factor of 2 to
    // compensate. Ideally, we would reduce by a factor of 3, but that
    // could get messy vis-a-vis keeping KC a multiple of the register
    // blocksizes.
    gemm3m3_mc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
                                  BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
    gemm3m3_nc
        =
            bli_blksz_obj_create( 0,                   0,
                                  0,                   0,
                                  BLIS_DEFAULT_NC_S/3, BLIS_MAXIMUM_NC_S/3,
                                  BLIS_DEFAULT_NC_D/3, BLIS_MAXIMUM_NC_D/3 );
    gemm3m3_kc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
                                  BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
    gemm3m3_mr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
                                  BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
    gemm3m3_nr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
                                  BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
    gemm3m3_kr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
                                  BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


    // Attach the register blksz_t objects as blocksize multiples to the cache
    // blksz_t objects.
    bli_blksz_obj_attach_mult_to( gemm3m3_mr, gemm3m3_mc );
    bli_blksz_obj_attach_mult_to( gemm3m3_nr, gemm3m3_nc );
    bli_blksz_obj_attach_mult_to( gemm3m3_kr, gemm3m3_kc );


    // The cache blocksizes that were scaled above need to be rounded down
    // to their respective nearest register blocksize multiples. Note that
    // this can only happen after the appropriate register blocksize is
    // actually attached as a multiple.
    bli_blksz_reduce_to_mult( gemm3m3_nc );


    // Attach the mr and nr blksz_t objects to each cache blksz_t object.
    // The primary example of why this is needed relates to nudging kc.
    // In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
    // since the multiple we target in nudging depends on whether the
    // structured matrix is on the left or the right.
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_mc );
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_nc );
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_kc );


    // Create function pointer object for each datatype-specific gemm
    // micro-kernel.
    gemm3m3_ukrs
        =
            bli_func_obj_create(
                NULL,                  FALSE,
                NULL,                  FALSE,
                BLIS_CGEMM3M3_UKERNEL, BLIS_CGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS,
                BLIS_ZGEMM3M3_UKERNEL, BLIS_ZGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS );


    // Create control tree objects for packm operations.
    gemm3m3_packb_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT2,
                                       gemm3m3_kr,
                                       gemm3m3_nr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_COL_PANELS_3MS,
                                       BLIS_BUFFER_FOR_B_PANEL );


    //
    // Create a control tree for packing A and B, and streaming C.
    //

    // Create control tree object for lowest-level block-panel kernel.
    gemm3m3_cntl_bp_ke
        =
            bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
                                      BLIS_VARIANT2,
                                      NULL,
                                      gemm3m3_ukrs,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL );

    // Create control tree object for outer panel (to block-panel)
    // problem.
    gemm3m3_cntl_op_bp
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT4,
                                      gemm3m3_mc,
                                      NULL,
                                      NULL,
                                      NULL, // packm cntl nodes accessed directly from blk_var4
                                      gemm3m3_packb_cntl,
                                      NULL,
                                      gemm3m3_cntl_bp_ke,
                                      NULL );

    // Create control tree object for general problem via multiple
    // rank-k (outer panel) updates.
    gemm3m3_cntl_mm_op
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT3,
                                      gemm3m3_kc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm3m3_cntl_op_bp,
                                      NULL );

    // Create control tree object for very large problem via multiple
    // general problems.
    gemm3m3_cntl_vl_mm
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemm3m3_nc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm3m3_cntl_mm_op,
                                      NULL );

    // Alias the "master" gemm control tree to a shorter name.
    gemm3m3_cntl = gemm3m3_cntl_vl_mm;

}
Exemple #9
0
void bli_gemv_cntl_init()
{
    // Create blocksize objects for each dimension.
    gemv_mc
        =
            bli_blksz_obj_create( BLIS_DEFAULT_L2_MC_S, 0,
                                  BLIS_DEFAULT_L2_MC_D, 0,
                                  BLIS_DEFAULT_L2_MC_C, 0,
                                  BLIS_DEFAULT_L2_MC_Z, 0 );
    gemv_nc
        =
            bli_blksz_obj_create( BLIS_DEFAULT_L2_NC_S, 0,
                                  BLIS_DEFAULT_L2_NC_D, 0,
                                  BLIS_DEFAULT_L2_NC_C, 0,
                                  BLIS_DEFAULT_L2_NC_Z, 0 );


    // Create control trees for the lowest-level kernels. These trees induce
    // operations on (persumably) relatively small block-subvector problems.
    gemv_cntl_bs_ke_dot
        =
            bli_gemv_cntl_obj_create( BLIS_UNB_FUSED,
                                      BLIS_VARIANT1,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL,
                                      NULL );
    gemv_cntl_bs_ke_axpy
        =
            bli_gemv_cntl_obj_create( BLIS_UNB_FUSED,
                                      BLIS_VARIANT2,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL,
                                      NULL );


    // Create control trees for problems with relatively small m dimension
    // (ie: where trans(A) is a row panel problem).
    gemv_cntl_rp_bs_dot
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemv_nc,
                                      scalv_cntl,     // scale y up-front
                                      packm_cntl,     // pack A1 (if needed)
                                      packv_cntl,     // pack x1 (if needed)
                                      NULL,           // y is not partitioned in var2
                                      gemv_cntl_bs_ke_dot,
                                      NULL );         // y is not partitioned in var2
    gemv_cntl_rp_bs_axpy
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemv_nc,
                                      scalv_cntl,     // scale y up-front
                                      packm_cntl,     // pack A1 (if needed)
                                      packv_cntl,     // pack x1 (if needed)
                                      NULL,           // y is not partitioned in var2
                                      gemv_cntl_bs_ke_axpy,
                                      NULL );         // y is not partitioned in var2


    // Create control trees for problems with relatively small n dimension
    // (ie: where trans(A) is a column panel problem).
    gemv_cntl_cp_bs_dot
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemv_mc,
                                      NULL,           // no scaling in blk_var1
                                      packm_cntl,     // pack A1 (if needed)
                                      NULL,           // x is not partitioned in var1
                                      packv_cntl,     // pack y1 (if needed)
                                      gemv_cntl_bs_ke_dot,
                                      unpackv_cntl ); // unpack y1 (if packed)
    gemv_cntl_cp_bs_axpy
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemv_mc,
                                      NULL,           // no scaling in blk_var1
                                      packm_cntl,     // pack A1 (if needed)
                                      NULL,           // x is not partitioned in var1
                                      packv_cntl,     // pack y1 (if needed)
                                      gemv_cntl_bs_ke_axpy,
                                      unpackv_cntl ); // unpack y1 (if packed)


    // Create control trees for generally large problems. Here, we choose a
    // variant that partitions subproblems into row panels.
    gemv_cntl_ge_dot
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemv_mc,
                                      NULL,           // no scaling in blk_var1
                                      NULL,           // do not pack A1
                                      NULL,           // x is not partitioned in var1
                                      packv_cntl,     // pack y1 (if needed)
                                      gemv_cntl_rp_bs_dot,
                                      unpackv_cntl ); // unpack y1 (if packed)
    gemv_cntl_ge_axpy
        =
            bli_gemv_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemv_mc,
                                      NULL,           // no scaling in blk_var1
                                      NULL,           // do not pack A1
                                      NULL,           // x is not partitioned in var1
                                      packv_cntl,     // pack y1 (if needed)
                                      gemv_cntl_rp_bs_axpy,
                                      unpackv_cntl ); // unpack y1 (if packed)
}
Exemple #10
0
void bli_packm_cntl_init()
{
	// Create blocksize objects for m and n register blocking. We will attach
	// these to the packm control node so they can be used to (a) allocate a
	// block whose m and n dimension are multiples of mr and nr, and (b) know
	// how much zero-padding is necessary for edge cases. 
	// NOTE: these alignments end up getting applied to matrices packed for
	// level-2 operations, even though they are not needed, and/or smaller
	// alignments may be sufficient. For simplicity, we choose to tweak the
	// dimensions of all pack matrix buffers the same amount.
	packm_mult_ldim
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MR_S, 0,
	                      BLIS_DEFAULT_MR_D, 0,
	                      BLIS_DEFAULT_MR_C, 0,
	                      BLIS_DEFAULT_MR_Z, 0 );

	packm_mult_nvec
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NR_S, 0,
	                      BLIS_DEFAULT_NR_D, 0,
	                      BLIS_DEFAULT_NR_C, 0,
	                      BLIS_DEFAULT_NR_Z, 0 );

	// Generally speaking, the BLIS_PACKED_ROWS and BLIS_PACKED_COLUMNS
	// are used by the level-2 operations, and thus densification is not
	// necessary. These schemas amount to simple copies to row or column
	// storage. These simple schemas may be used by level-3 operations,
	// but they should never be used for matrices with structure (since
	// they do not densify).
	// The BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS schemas are
	// used only in level-3 operations. They pack to (typically) skinny
	// row and column panels, where the width of the panel is determined
	// by register blocksizes. They are configured to densify matrices
	// with structure, though they can also be used on matrices that
	// are already dense and/or have no structure.

	// Create control trees to pack by rows.
	packm_cntl_row
	=
	bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
	                           BLIS_VARIANT1,    // When packing to rows:
	                           packm_mult_nvec,  // - nvec multiple is used for m dimension
	                           packm_mult_ldim,  // - ldim multiple is used for n dimension
	                           FALSE,            // do NOT densify structure
	                           FALSE,            // do NOT invert diagonal
	                           FALSE,            // do NOT iterate backwards if upper
	                           FALSE,            // do NOT iterate backwards if lower
	                           BLIS_PACKED_ROWS,
	                           BLIS_BUFFER_FOR_GEN_USE );


	// Create control trees to pack by columns.
	packm_cntl_col
	=
	bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
	                           BLIS_VARIANT1,    // When packing to columns:
	                           packm_mult_ldim,  // - ldim multiple is used for m dimension
	                           packm_mult_nvec,  // - nvec multiple is used for n dimension
	                           FALSE,            // do NOT densify structure
	                           FALSE,            // do NOT invert diagonal
	                           FALSE,            // do NOT iterate backwards if upper
	                           FALSE,            // do NOT iterate backwards if lower
	                           BLIS_PACKED_COLUMNS,
	                           BLIS_BUFFER_FOR_GEN_USE );


	// Set defaults when we don't care whether the packing is by rows or
	// by columns.
	packm_cntl = packm_cntl_col;
}
Exemple #11
0
void bli_herk_cntl_init()
{
	// Create blocksize objects for each dimension.
	herk_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
	                                BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
	                                BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
	                                BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );

	herk_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
	                                BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
	                                BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
	                                BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );

	herk_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
	                                BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
	                                BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
	                                BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );

	herk_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
	                                BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
	                                BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
	                                BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );

	herk_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
	                                BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
	                                BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
	                                BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );

	herk_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
	                                BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
	                                BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
	                                BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );

	herk_ni = bli_blksz_obj_create( BLIS_DEFAULT_NI_S, 0,
	                                BLIS_DEFAULT_NI_D, 0,
	                                BLIS_DEFAULT_NI_C, 0,
	                                BLIS_DEFAULT_NI_Z, 0 );


	// Create control tree objects for packm operations.
	herk_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           herk_mr,
	                           herk_kr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	herk_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           herk_kr,
	                           herk_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );

	// Create control tree objects for packm/unpackm operations on C.
	herk_packc_cntl
	=
	bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
	                           BLIS_VARIANT1,
	                           herk_mr,
	                           herk_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COLUMNS,
	                           BLIS_BUFFER_FOR_GEN_USE );

	herk_unpackc_cntl
	=
	bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
	                             BLIS_VARIANT1,
	                             NULL ); // no blocksize needed


	// Create control tree object for lowest-level block-panel kernel.
	herk_cntl_bp_ke
	=
	bli_herk_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	herk_cntl_op_bp
	=
	bli_herk_cntl_obj_create( BLIS_BLOCKED,
	                          //BLIS_VARIANT4,  // var1 with incremental pack in iter 0
	                          BLIS_VARIANT1,
	                          herk_mc,
	                          herk_ni,
	                          NULL,
	                          herk_packa_cntl,
	                          herk_packb_cntl,
	                          NULL,
	                          herk_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	herk_cntl_mm_op
	=
	bli_herk_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          herk_kc,
	                          NULL,
	                          NULL,
	                          NULL, 
	                          NULL,
	                          NULL,
	                          herk_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	herk_cntl_vl_mm
	=
	bli_herk_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          herk_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          herk_cntl_mm_op,
	                          NULL );

	// Alias the "master" herk control tree to a shorter name.
	herk_cntl = herk_cntl_vl_mm;
}
Exemple #12
0
void bli_gemm4m1_cntl_init()
{
    // Create blocksize objects for each dimension.
    // NOTE: the complex blocksizes for 4m1 are generally equal to their
    // corresponding real domain counterparts. However, we want to promote
    // similar cache footprints for the micro-panels of A and B (when
    // compared to executing in the real domain), and since the complex
    // micro-panels are twice as "fat" (due to storing real and imaginary
    // parts), we reduce KC by a factor of 2 to compensate.
    gemm4m1_mc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
                                  BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
    gemm4m1_nc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
                                  BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D );
    gemm4m1_kc
        =
            bli_blksz_obj_create( 0,                   0,
                                  0,                   0,
                                  BLIS_DEFAULT_KC_S/2, BLIS_MAXIMUM_KC_S/2,
                                  BLIS_DEFAULT_KC_D/2, BLIS_MAXIMUM_KC_D/2 );
    gemm4m1_mr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
                                  BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
    gemm4m1_nr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
                                  BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
    gemm4m1_kr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
                                  BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


    // Attach the register blksz_t objects as blocksize multiples to the cache
    // blksz_t objects.
    bli_blksz_obj_attach_mult_to( gemm4m1_mr, gemm4m1_mc );
    bli_blksz_obj_attach_mult_to( gemm4m1_nr, gemm4m1_nc );
    bli_blksz_obj_attach_mult_to( gemm4m1_kr, gemm4m1_kc );


    // Attach the mr and nr blksz_t objects to each cache blksz_t object.
    // The primary example of why this is needed relates to nudging kc.
    // In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
    // since the multiple we target in nudging depends on whether the
    // structured matrix is on the left or the right.
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_mc );
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_nc );
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_kc );


    // Create function pointer object for each datatype-specific gemm
    // micro-kernel.
    gemm4m1_ukrs
        =
            bli_func_obj_create(
                NULL,                 FALSE,
                NULL,                 FALSE,
                BLIS_CGEMM4M1_UKERNEL, BLIS_CGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS,
                BLIS_ZGEMM4M1_UKERNEL, BLIS_ZGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS );


    // Create control tree objects for packm operations.
    gemm4m1_packa_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT1,
                                       gemm4m1_mr,
                                       gemm4m1_kr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_ROW_PANELS_4MI,
                                       BLIS_BUFFER_FOR_A_BLOCK );

    gemm4m1_packb_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT1,
                                       gemm4m1_kr,
                                       gemm4m1_nr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_COL_PANELS_4MI,
                                       BLIS_BUFFER_FOR_B_PANEL );


    //
    // Create a control tree for packing A and B, and streaming C.
    //

    // Create control tree object for lowest-level block-panel kernel.
    gemm4m1_cntl_bp_ke
        =
            bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
                                      BLIS_VARIANT2,
                                      NULL,
                                      gemm4m1_ukrs,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL );

    // Create control tree object for outer panel (to block-panel)
    // problem.
    gemm4m1_cntl_op_bp
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemm4m1_mc,
                                      NULL,
                                      NULL,
                                      gemm4m1_packa_cntl,
                                      gemm4m1_packb_cntl,
                                      NULL,
                                      gemm4m1_cntl_bp_ke,
                                      NULL );

    // Create control tree object for general problem via multiple
    // rank-k (outer panel) updates.
    gemm4m1_cntl_mm_op
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT3,
                                      gemm4m1_kc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm4m1_cntl_op_bp,
                                      NULL );

    // Create control tree object for very large problem via multiple
    // general problems.
    gemm4m1_cntl_vl_mm
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemm4m1_nc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm4m1_cntl_mm_op,
                                      NULL );

    // Alias the "master" gemm control tree to a shorter name.
    gemm4m1_cntl = gemm4m1_cntl_vl_mm;

}
Exemple #13
0
void bli_gemm4mh_cntl_init()
{
	// Create blocksize objects for each dimension.
	// NOTE: the complex blocksizes for 4mh are equal to their
	// corresponding real domain counterparts.
	gemm4mh_mc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
	gemm4mh_nc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D );
	gemm4mh_kc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
	gemm4mh_mr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
	gemm4mh_nr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
	gemm4mh_kr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm4mh_mr, gemm4mh_mc );
	bli_blksz_obj_attach_mult_to( gemm4mh_nr, gemm4mh_nc );
	bli_blksz_obj_attach_mult_to( gemm4mh_kr, gemm4mh_kc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm4mh_ukrs
	=
	bli_func_obj_create(
	    NULL,                  FALSE,
	    NULL,                  FALSE,
	    BLIS_CGEMM4MH_UKERNEL, BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS,
	    BLIS_ZGEMM4MH_UKERNEL, BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations (real only).
	gemm4mh_packa_cntl_ro
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_mr,
	                           gemm4mh_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_RO,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mh_packb_cntl_ro
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_kr,
	                           gemm4mh_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_RO,
	                           BLIS_BUFFER_FOR_B_PANEL );

	// Create control tree objects for packm operations (imag only).
	gemm4mh_packa_cntl_io
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_mr,
	                           gemm4mh_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_IO,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mh_packb_cntl_io
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_kr,
	                           gemm4mh_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_IO,
	                           BLIS_BUFFER_FOR_B_PANEL );


	// Create control tree object for lowest-level block-panel kernel.
	gemm4mh_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm4mh_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	//
	// Create control tree for A.real * B.real.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (real x real)
	gemm4mh_cntl_op_bp_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_ro,
	                          gemm4mh_packb_cntl_ro,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (real x real)
	gemm4mh_cntl_mm_op_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_rr,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (real x real)
	gemm4mh_cntl_vl_mm_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_rr,
	                          NULL );

	//
	// Create control tree for A.real * B.imag.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (real x imag)
	gemm4mh_cntl_op_bp_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_ro,
	                          gemm4mh_packb_cntl_io,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (real x imag)
	gemm4mh_cntl_mm_op_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ri,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (real x imag)
	gemm4mh_cntl_vl_mm_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ri,
	                          NULL );

	//
	// Create control tree for A.imag * B.real.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (imag x real)
	gemm4mh_cntl_op_bp_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_io,
	                          gemm4mh_packb_cntl_ro,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (imag x real)
	gemm4mh_cntl_mm_op_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ir,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (imag x real)
	gemm4mh_cntl_vl_mm_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ir,
	                          NULL );

	//
	// Create control tree for A.imag * B.imag.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (imag x imag)
	gemm4mh_cntl_op_bp_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_io,
	                          gemm4mh_packb_cntl_io,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (imag x imag)
	gemm4mh_cntl_mm_op_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ii,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (imag x imag)
	gemm4mh_cntl_vl_mm_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ii,
	                          NULL );


	// Alias the "master" gemm control tree to a shorter name.
	gemm4mh_cntl_rr = gemm4mh_cntl_vl_mm_rr;
	gemm4mh_cntl_ri = gemm4mh_cntl_vl_mm_ri;
	gemm4mh_cntl_ir = gemm4mh_cntl_vl_mm_ir;
	gemm4mh_cntl_ii = gemm4mh_cntl_vl_mm_ii;

}
Exemple #14
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
	                                BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
	                                BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
	                                BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );

	gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
	                                BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
	                                BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
	                                BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );

	gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
	                                BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
	                                BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
	                                BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );

	gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
	                                BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
	                                BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
	                                BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );

	gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
	                                BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
	                                BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
	                                BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );

	gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, 0,
	                                BLIS_DEFAULT_KR_D, 0,
	                                BLIS_DEFAULT_KR_C, 0,
	                                BLIS_DEFAULT_KR_Z, 0 );

	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL,
	                                 BLIS_DGEMM_UKERNEL,
	                                 BLIS_CGEMM_UKERNEL,
	                                 BLIS_ZGEMM_UKERNEL );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           TRUE,  // densify; used by hemm/symm
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           TRUE,  // densify; used by hemm/symm
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;

	//bli_gemm_cntl_init_exp();
}
Exemple #15
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D,
	                      BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C,
	                      BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z );
	gemm_nc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D,
	                      BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C,
	                      BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z );
	gemm_kc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D,
	                      BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C,
	                      BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z );
	gemm_mr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D,
	                      BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C,
	                      BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z );
	gemm_nr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D,
	                      BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C,
	                      BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z );
	gemm_kr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D,
	                      BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C,
	                      BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z );


	// Create objects for micro-panel alignment (in bytes).
	gemm_upanel_a_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 );
	gemm_upanel_b_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm_mr, gemm_mc );
	bli_blksz_obj_attach_mult_to( gemm_nr, gemm_nc );
	bli_blksz_obj_attach_mult_to( gemm_kr, gemm_kc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create function pointer object for reference micro-kernels.
	gemm_ref_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL_REF, FALSE,
	                     BLIS_DGEMM_UKERNEL_REF, FALSE,
	                     BLIS_CGEMM_UKERNEL_REF, FALSE,
	                     BLIS_ZGEMM_UKERNEL_REF, FALSE );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;
}