void bli_gemm4mb_cntl_init() { // Create blocksize objects for each dimension. gemm4mb_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S/2, BLIS_MAXIMUM_MC_S/2, BLIS_DEFAULT_MC_D/2, BLIS_MAXIMUM_MC_D/2 ); gemm4mb_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S/2, BLIS_MAXIMUM_NC_S/2, BLIS_DEFAULT_NC_D/2, BLIS_MAXIMUM_NC_D/2 ); gemm4mb_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D ); gemm4mb_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); gemm4mb_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); gemm4mb_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D ); // Attach the register blksz_t objects as blocksize multiples to the cache // blksz_t objects. bli_blksz_obj_attach_mult_to( gemm4mb_mr, gemm4mb_mc ); bli_blksz_obj_attach_mult_to( gemm4mb_nr, gemm4mb_nc ); bli_blksz_obj_attach_mult_to( gemm4mb_kr, gemm4mb_kc ); // The cache blocksizes that were scaled above need to be rounded down // to their respective nearest register blocksize multiples. Note that // this can only happen after the appropriate register blocksize is // actually attached as a multiple. bli_blksz_reduce_to_mult( gemm4mb_mc ); bli_blksz_reduce_to_mult( gemm4mb_nc ); // Attach the mr and nr blksz_t objects to each cache blksz_t object. // The primary example of why this is needed relates to nudging kc. // In hemm, symm, trmm, or trmm3, we need to know both mr and nr, // since the multiple we target in nudging depends on whether the // structured matrix is on the left or the right. bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_mc ); bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_nc ); bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm4mb_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMM4MB_UKERNEL, BLIS_CGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM4MB_UKERNEL, BLIS_ZGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. gemm4mb_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mb_mr, gemm4mb_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mb_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mb_kr, gemm4mb_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm4mb_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT3, NULL, gemm4mb_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm4mb_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mb_mc, NULL, NULL, gemm4mb_packa_cntl, gemm4mb_packb_cntl, NULL, gemm4mb_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm4mb_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mb_kc, NULL, NULL, NULL, NULL, NULL, gemm4mb_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm4mb_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mb_nc, NULL, NULL, NULL, NULL, NULL, gemm4mb_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm4mb_cntl = gemm4mb_cntl_vl_mm; }
void bli_gemm_cntl_init() { // Create blocksize objects for each dimension. gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); // Create control tree objects for packm operations. gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_mr, gemm_kr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_kr, gemm_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm/unpackm operations on C. gemm_packc_cntl = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, gemm_mr, gemm_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COLUMNS, BLIS_BUFFER_FOR_C_PANEL ); gemm_unpackc_cntl = bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, NULL ); // no blocksize needed // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, NULL, gemm_packa_cntl, gemm_packb_cntl, NULL, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, NULL, NULL, NULL, NULL, gemm_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, NULL, NULL, NULL, NULL, gemm_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm_cntl = gemm_cntl_vl_mm; #if 0 // // Create a control tree for packing A, and streaming B and C. // gemm_cntl_bp_ke5 = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT5, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); gemm_cntl_pm_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, NULL, gemm_packa_cntl, NULL, //gemm_packc_cntl, NULL, gemm_cntl_bp_ke5, //gemm_unpackc_cntl ); NULL ); gemm_cntl_mm_pm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, NULL, NULL, NULL, NULL, gemm_cntl_pm_bp, NULL ); gemm_cntl_vl_mm5 = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, NULL, NULL, NULL, NULL, gemm_cntl_mm_pm, NULL ); gemm_cntl_packa = gemm_cntl_vl_mm5; #endif }
void bli_gemm_cntl_init() { // Create blocksize objects for each dimension. gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D, BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C, BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z ); gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D, BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C, BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z ); gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D, BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C, BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z ); gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D, BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C, BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z ); gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D, BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C, BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z ); gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D, BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C, BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z ); // Create objects for micro-panel alignment (in bytes). gemm_upanel_a_align = bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0, BLIS_UPANEL_A_ALIGN_SIZE_D, 0, BLIS_UPANEL_A_ALIGN_SIZE_C, 0, BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 ); gemm_upanel_b_align = bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0, BLIS_UPANEL_B_ALIGN_SIZE_D, 0, BLIS_UPANEL_B_ALIGN_SIZE_C, 0, BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 ); // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. bli_blksz_obj_attach_to( gemm_mr, gemm_mc ); bli_blksz_obj_attach_to( gemm_nr, gemm_nc ); bli_blksz_obj_attach_to( gemm_kr, gemm_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mr, gemm_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_kr, gemm_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, gemm_ukrs, NULL, gemm_packa_cntl, gemm_packb_cntl, NULL, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, gemm_ukrs, NULL, NULL, NULL, NULL, gemm_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, gemm_ukrs, NULL, NULL, NULL, NULL, gemm_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm_cntl = gemm_cntl_vl_mm; }
cntl_t* bli_trsm_l_cntl_create ( void ) { void* macro_kernel_p = bli_trsm_xx_ker_var2; // Create two nodes for the macro-kernel. cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_obj_create ( BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_obj_create ( BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, trsm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* trsm_cntl_packa = bli_packm_cntl_obj_create ( bli_trsm_packa, bli_packm_blk_var1, BLIS_MR, BLIS_MR, TRUE, // do NOT invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_obj_create ( BLIS_MC, bli_trsm_blk_var1, trsm_cntl_packa ); // Create a node for packing matrix B. cntl_t* trsm_cntl_packb = bli_packm_cntl_obj_create ( bli_trsm_packb, bli_packm_blk_var1, BLIS_MR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_obj_create ( BLIS_KC, bli_trsm_blk_var3, trsm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_obj_create ( BLIS_NC, bli_trsm_blk_var2, trsm_cntl_mm_op ); return trsm_cntl_vl_mm; }
void bli_gemm3m3_cntl_init() { // Create blocksize objects for each dimension. // NOTE: the complex blocksizes for 3m3 are generally equal to their // corresponding real domain counterparts. However, we want to promote // similar cache footprints for the micro-panels of A and B (when // compared to executing in the real domain), and since the complex // micro-panels are three times as "fat" (due to storing real, imaginary // and real+imaginary parts), we reduce KC by a factor of 2 to // compensate. Ideally, we would reduce by a factor of 3, but that // could get messy vis-a-vis keeping KC a multiple of the register // blocksizes. gemm3m3_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); gemm3m3_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S/3, BLIS_MAXIMUM_NC_S/3, BLIS_DEFAULT_NC_D/3, BLIS_MAXIMUM_NC_D/3 ); gemm3m3_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D ); gemm3m3_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); gemm3m3_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); gemm3m3_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D ); // Attach the register blksz_t objects as blocksize multiples to the cache // blksz_t objects. bli_blksz_obj_attach_mult_to( gemm3m3_mr, gemm3m3_mc ); bli_blksz_obj_attach_mult_to( gemm3m3_nr, gemm3m3_nc ); bli_blksz_obj_attach_mult_to( gemm3m3_kr, gemm3m3_kc ); // The cache blocksizes that were scaled above need to be rounded down // to their respective nearest register blocksize multiples. Note that // this can only happen after the appropriate register blocksize is // actually attached as a multiple. bli_blksz_reduce_to_mult( gemm3m3_nc ); // Attach the mr and nr blksz_t objects to each cache blksz_t object. // The primary example of why this is needed relates to nudging kc. // In hemm, symm, trmm, or trmm3, we need to know both mr and nr, // since the multiple we target in nudging depends on whether the // structured matrix is on the left or the right. bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_mc ); bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_nc ); bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm3m3_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMM3M3_UKERNEL, BLIS_CGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM3M3_UKERNEL, BLIS_ZGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. gemm3m3_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m3_kr, gemm3m3_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3MS, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm3m3_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm3m3_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm3m3_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT4, gemm3m3_mc, NULL, NULL, NULL, // packm cntl nodes accessed directly from blk_var4 gemm3m3_packb_cntl, NULL, gemm3m3_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm3m3_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m3_kc, NULL, NULL, NULL, NULL, NULL, gemm3m3_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm3m3_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m3_nc, NULL, NULL, NULL, NULL, NULL, gemm3m3_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm3m3_cntl = gemm3m3_cntl_vl_mm; }
void bli_herk_cntl_init() { // Create blocksize objects for each dimension. herk_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); herk_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); herk_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); herk_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); herk_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); herk_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); herk_ni = bli_blksz_obj_create( BLIS_DEFAULT_NI_S, 0, BLIS_DEFAULT_NI_D, 0, BLIS_DEFAULT_NI_C, 0, BLIS_DEFAULT_NI_Z, 0 ); // Create control tree objects for packm operations. herk_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, herk_mr, herk_kr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); herk_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, herk_kr, herk_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm/unpackm operations on C. herk_packc_cntl = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, herk_mr, herk_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COLUMNS, BLIS_BUFFER_FOR_GEN_USE ); herk_unpackc_cntl = bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, NULL ); // no blocksize needed // Create control tree object for lowest-level block-panel kernel. herk_cntl_bp_ke = bli_herk_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. herk_cntl_op_bp = bli_herk_cntl_obj_create( BLIS_BLOCKED, //BLIS_VARIANT4, // var1 with incremental pack in iter 0 BLIS_VARIANT1, herk_mc, herk_ni, NULL, herk_packa_cntl, herk_packb_cntl, NULL, herk_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. herk_cntl_mm_op = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, herk_kc, NULL, NULL, NULL, NULL, NULL, herk_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. herk_cntl_vl_mm = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, herk_nc, NULL, NULL, NULL, NULL, NULL, herk_cntl_mm_op, NULL ); // Alias the "master" herk control tree to a shorter name. herk_cntl = herk_cntl_vl_mm; }
void bli_packm_cntl_init() { // Create blocksize objects for m and n register blocking. We will attach // these to the packm control node so they can be used to (a) allocate a // block whose m and n dimension are multiples of mr and nr, and (b) know // how much zero-padding is necessary for edge cases. // NOTE: these alignments end up getting applied to matrices packed for // level-2 operations, even though they are not needed, and/or smaller // alignments may be sufficient. For simplicity, we choose to tweak the // dimensions of all pack matrix buffers the same amount. packm_mult_ldim = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, 0, BLIS_DEFAULT_MR_D, 0, BLIS_DEFAULT_MR_C, 0, BLIS_DEFAULT_MR_Z, 0 ); packm_mult_nvec = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, 0, BLIS_DEFAULT_NR_D, 0, BLIS_DEFAULT_NR_C, 0, BLIS_DEFAULT_NR_Z, 0 ); // Generally speaking, the BLIS_PACKED_ROWS and BLIS_PACKED_COLUMNS // are used by the level-2 operations, and thus densification is not // necessary. These schemas amount to simple copies to row or column // storage. These simple schemas may be used by level-3 operations, // but they should never be used for matrices with structure (since // they do not densify). // The BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS schemas are // used only in level-3 operations. They pack to (typically) skinny // row and column panels, where the width of the panel is determined // by register blocksizes. They are configured to densify matrices // with structure, though they can also be used on matrices that // are already dense and/or have no structure. // Create control trees to pack by rows. packm_cntl_row = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, // When packing to rows: packm_mult_nvec, // - nvec multiple is used for m dimension packm_mult_ldim, // - ldim multiple is used for n dimension FALSE, // do NOT densify structure FALSE, // do NOT invert diagonal FALSE, // do NOT iterate backwards if upper FALSE, // do NOT iterate backwards if lower BLIS_PACKED_ROWS, BLIS_BUFFER_FOR_GEN_USE ); // Create control trees to pack by columns. packm_cntl_col = bli_packm_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, // When packing to columns: packm_mult_ldim, // - ldim multiple is used for m dimension packm_mult_nvec, // - nvec multiple is used for n dimension FALSE, // do NOT densify structure FALSE, // do NOT invert diagonal FALSE, // do NOT iterate backwards if upper FALSE, // do NOT iterate backwards if lower BLIS_PACKED_COLUMNS, BLIS_BUFFER_FOR_GEN_USE ); // Set defaults when we don't care whether the packing is by rows or // by columns. packm_cntl = packm_cntl_col; }
void bli_gemm4m1_cntl_init() { // Create blocksize objects for each dimension. // NOTE: the complex blocksizes for 4m1 are generally equal to their // corresponding real domain counterparts. However, we want to promote // similar cache footprints for the micro-panels of A and B (when // compared to executing in the real domain), and since the complex // micro-panels are twice as "fat" (due to storing real and imaginary // parts), we reduce KC by a factor of 2 to compensate. gemm4m1_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); gemm4m1_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D ); gemm4m1_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S/2, BLIS_MAXIMUM_KC_S/2, BLIS_DEFAULT_KC_D/2, BLIS_MAXIMUM_KC_D/2 ); gemm4m1_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); gemm4m1_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); gemm4m1_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D ); // Attach the register blksz_t objects as blocksize multiples to the cache // blksz_t objects. bli_blksz_obj_attach_mult_to( gemm4m1_mr, gemm4m1_mc ); bli_blksz_obj_attach_mult_to( gemm4m1_nr, gemm4m1_nc ); bli_blksz_obj_attach_mult_to( gemm4m1_kr, gemm4m1_kc ); // Attach the mr and nr blksz_t objects to each cache blksz_t object. // The primary example of why this is needed relates to nudging kc. // In hemm, symm, trmm, or trmm3, we need to know both mr and nr, // since the multiple we target in nudging depends on whether the // structured matrix is on the left or the right. bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_mc ); bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_nc ); bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm4m1_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMM4M1_UKERNEL, BLIS_CGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM4M1_UKERNEL, BLIS_ZGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. gemm4m1_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4m1_mr, gemm4m1_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); gemm4m1_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4m1_kr, gemm4m1_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm4m1_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm4m1_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm4m1_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4m1_mc, NULL, NULL, gemm4m1_packa_cntl, gemm4m1_packb_cntl, NULL, gemm4m1_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm4m1_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4m1_kc, NULL, NULL, NULL, NULL, NULL, gemm4m1_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm4m1_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4m1_nc, NULL, NULL, NULL, NULL, NULL, gemm4m1_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm4m1_cntl = gemm4m1_cntl_vl_mm; }
void bli_herk_cntl_init() { // Create control tree objects for packm operations. herk_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mr, gemm_kr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); herk_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_kr, gemm_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. herk_cntl_bp_ke = bli_herk_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. herk_cntl_op_bp = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, NULL, NULL, herk_packa_cntl, herk_packb_cntl, NULL, herk_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. herk_cntl_mm_op = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, NULL, NULL, NULL, NULL, NULL, herk_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. herk_cntl_vl_mm = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, NULL, NULL, NULL, NULL, NULL, herk_cntl_mm_op, NULL ); // Alias the "master" herk control tree to a shorter name. herk_cntl = herk_cntl_vl_mm; }
void bli_gemm4mh_cntl_init() { // Create blocksize objects for each dimension. // NOTE: the complex blocksizes for 4mh are equal to their // corresponding real domain counterparts. gemm4mh_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); gemm4mh_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D ); gemm4mh_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D ); gemm4mh_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); gemm4mh_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); gemm4mh_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D ); // Attach the register blksz_t objects as blocksize multiples to the cache // blksz_t objects. bli_blksz_obj_attach_mult_to( gemm4mh_mr, gemm4mh_mc ); bli_blksz_obj_attach_mult_to( gemm4mh_nr, gemm4mh_nc ); bli_blksz_obj_attach_mult_to( gemm4mh_kr, gemm4mh_kc ); // Attach the mr and nr blksz_t objects to each cache blksz_t object. // The primary example of why this is needed relates to nudging kc. // In hemm, symm, trmm, or trmm3, we need to know both mr and nr, // since the multiple we target in nudging depends on whether the // structured matrix is on the left or the right. bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_mc ); bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_nc ); bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm4mh_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMM4MH_UKERNEL, BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM4MH_UKERNEL, BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations (real only). gemm4mh_packa_cntl_ro = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_mr, gemm4mh_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_RO, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mh_packb_cntl_ro = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_kr, gemm4mh_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_RO, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (imag only). gemm4mh_packa_cntl_io = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_mr, gemm4mh_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_IO, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mh_packb_cntl_io = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_kr, gemm4mh_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_IO, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. gemm4mh_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm4mh_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // // Create control tree for A.real * B.real. // // Create control tree object for outer panel (to block-panel) // problem. (real x real) gemm4mh_cntl_op_bp_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, NULL, NULL, gemm4mh_packa_cntl_ro, gemm4mh_packb_cntl_ro, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (real x real) gemm4mh_cntl_mm_op_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_rr, NULL ); // Create control tree object for very large problem via multiple // general problems. (real x real) gemm4mh_cntl_vl_mm_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_rr, NULL ); // // Create control tree for A.real * B.imag. // // Create control tree object for outer panel (to block-panel) // problem. (real x imag) gemm4mh_cntl_op_bp_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, NULL, NULL, gemm4mh_packa_cntl_ro, gemm4mh_packb_cntl_io, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (real x imag) gemm4mh_cntl_mm_op_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ri, NULL ); // Create control tree object for very large problem via multiple // general problems. (real x imag) gemm4mh_cntl_vl_mm_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ri, NULL ); // // Create control tree for A.imag * B.real. // // Create control tree object for outer panel (to block-panel) // problem. (imag x real) gemm4mh_cntl_op_bp_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, NULL, NULL, gemm4mh_packa_cntl_io, gemm4mh_packb_cntl_ro, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (imag x real) gemm4mh_cntl_mm_op_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ir, NULL ); // Create control tree object for very large problem via multiple // general problems. (imag x real) gemm4mh_cntl_vl_mm_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ir, NULL ); // // Create control tree for A.imag * B.imag. // // Create control tree object for outer panel (to block-panel) // problem. (imag x imag) gemm4mh_cntl_op_bp_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, NULL, NULL, gemm4mh_packa_cntl_io, gemm4mh_packb_cntl_io, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (imag x imag) gemm4mh_cntl_mm_op_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ii, NULL ); // Create control tree object for very large problem via multiple // general problems. (imag x imag) gemm4mh_cntl_vl_mm_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, NULL, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ii, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm4mh_cntl_rr = gemm4mh_cntl_vl_mm_rr; gemm4mh_cntl_ri = gemm4mh_cntl_vl_mm_ri; gemm4mh_cntl_ir = gemm4mh_cntl_vl_mm_ir; gemm4mh_cntl_ii = gemm4mh_cntl_vl_mm_ii; }
void bli_trmm_cntl_init() { // Create control tree objects for packm operations (left side). trmm_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // IMPORTANT: Unlike trsm, trmm does not require a // "k" dim multiple equal to mr. gemm_mr, gemm_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); trmm_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // IMPORTANT: Unlike trsm, trmm does not require a // "k" dim multiple equal to mr. gemm_kr, gemm_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. trmm_cntl_bp_ke = bli_trmm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). trmm_l_cntl_op_bp = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, gemm_ukrs, NULL, trmm_l_packa_cntl, trmm_l_packb_cntl, NULL, trmm_cntl_bp_ke, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). trmm_l_cntl_mm_op = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, gemm_ukrs, NULL, NULL, NULL, NULL, trmm_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). trmm_l_cntl_vl_mm = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, gemm_ukrs, NULL, NULL, NULL, NULL, trmm_l_cntl_mm_op, NULL, NULL ); // Alias the "master" trmm control trees to shorter names. trmm_l_cntl = trmm_l_cntl_vl_mm; }
void bli_trmm3m_cntl_init() { // Create control tree objects for packm operations (left side). trmm3m_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to mr. gemm3m_mr, gemm3m_mr, TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trmm3m_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, // IMPORTANT: m dim multiple here must be mr // since "k" dim multiple is set to mr above. gemm3m_mr, gemm3m_nr, FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). trmm3m_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to nr. gemm3m_mr, gemm3m_nr, FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trmm3m_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, // IMPORTANT: m dim multiple here must be nr // since "k" dim multiple is set to nr above. gemm3m_nr, gemm3m_nr, TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. trmm3m_cntl_bp_ke = bli_trmm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). trmm3m_l_cntl_op_bp = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, NULL, NULL, trmm3m_l_packa_cntl, trmm3m_l_packb_cntl, NULL, trmm3m_cntl_bp_ke, gemm3m_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). trmm3m_l_cntl_mm_op = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, NULL, NULL, NULL, NULL, NULL, trmm3m_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). trmm3m_l_cntl_vl_mm = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, NULL, NULL, NULL, NULL, NULL, trmm3m_l_cntl_mm_op, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (right side). trmm3m_r_cntl_op_bp = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, NULL, NULL, trmm3m_r_packa_cntl, trmm3m_r_packb_cntl, NULL, trmm3m_cntl_bp_ke, gemm3m_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (right side). trmm3m_r_cntl_mm_op = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, NULL, NULL, NULL, NULL, NULL, trmm3m_r_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (right side). trmm3m_r_cntl_vl_mm = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, NULL, NULL, NULL, NULL, NULL, trmm3m_r_cntl_mm_op, NULL, NULL ); // Alias the "master" trmm control trees to shorter names. trmm3m_l_cntl = trmm3m_l_cntl_vl_mm; trmm3m_r_cntl = trmm3m_r_cntl_vl_mm; }
void bli_trsm3m_cntl_init() { // Create function pointer objects for each datatype-specific // gemmtrsm3m_l and gemmtrsm3m_u micro-kernel. gemmtrsm3m_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMMTRSM3M_L_UKERNEL, FALSE, BLIS_ZGEMMTRSM3M_L_UKERNEL, FALSE ); gemmtrsm3m_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMMTRSM3M_U_UKERNEL, FALSE, BLIS_ZGEMMTRSM3M_U_UKERNEL, FALSE ); // Create function pointer objects for each datatype-specific // trsm3m_l and trsm3m_u micro-kernel. trsm3m_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CTRSM3M_L_UKERNEL, FALSE, BLIS_ZTRSM3M_L_UKERNEL, FALSE ); trsm3m_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CTRSM3M_U_UKERNEL, FALSE, BLIS_ZTRSM3M_U_UKERNEL, FALSE ); // Create control tree objects for packm operations (left side). trsm3m_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases gemm3m_mr, gemm3m_mr, TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm gemm3m_mr, gemm3m_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). trsm3m_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nr, gemm3m_mr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_mr, gemm3m_mr, TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. trsm3m_cntl_bp_ke = bli_trsm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm3m_ukrs, gemmtrsm3m_l_ukrs, gemmtrsm3m_u_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). trsm3m_l_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, gemm3m_ukrs, NULL, NULL, NULL, trsm3m_l_packa_cntl, trsm3m_l_packb_cntl, NULL, trsm3m_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). trsm3m_l_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). trsm3m_l_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_l_cntl_mm_op, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (right side). trsm3m_r_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, gemm3m_ukrs, NULL, NULL, NULL, trsm3m_r_packa_cntl, trsm3m_r_packb_cntl, NULL, trsm3m_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (right side). trsm3m_r_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_r_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (right side). trsm3m_r_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_r_cntl_mm_op, NULL, NULL ); // Alias the "master" trsm control trees to shorter names. trsm3m_l_cntl = trsm3m_l_cntl_vl_mm; trsm3m_r_cntl = trsm3m_r_cntl_vl_mm; }
cntl_t* bli_gemm_cntl_create ( opid_t family ) { void* macro_kernel_p = bli_gemm_ker_var2; // Change the macro-kernel if the operation family is herk or trmm. if ( family == BLIS_HERK ) macro_kernel_p = bli_herk_x_ker_var2; else if ( family == BLIS_TRMM ) macro_kernel_p = bli_trmm_xx_ker_var2; // Create two nodes for the macro-kernel. cntl_t* gemm_cntl_bu_ke = bli_gemm_cntl_obj_create ( BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* gemm_cntl_bp_bu = bli_gemm_cntl_obj_create ( BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, gemm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* gemm_cntl_packa = bli_packm_cntl_obj_create ( bli_gemm_packa, bli_packm_blk_var1, BLIS_MR, BLIS_KR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* gemm_cntl_op_bp = bli_gemm_cntl_obj_create ( BLIS_MC, bli_gemm_blk_var1, gemm_cntl_packa ); // Create a node for packing matrix B. cntl_t* gemm_cntl_packb = bli_packm_cntl_obj_create ( bli_gemm_packb, bli_packm_blk_var1, BLIS_KR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* gemm_cntl_mm_op = bli_gemm_cntl_obj_create ( BLIS_KC, bli_gemm_blk_var3, gemm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* gemm_cntl_vl_mm = bli_gemm_cntl_obj_create ( BLIS_NC, bli_gemm_blk_var2, gemm_cntl_mm_op ); return gemm_cntl_vl_mm; }
void bli_gemm_cntl_init() { // Create blocksize objects for each dimension. gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, 0, BLIS_DEFAULT_KR_D, 0, BLIS_DEFAULT_KR_C, 0, BLIS_DEFAULT_KR_Z, 0 ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_DGEMM_UKERNEL, BLIS_CGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL ); // Create control tree objects for packm operations. gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mr, gemm_kr, TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_kr, gemm_nr, TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, NULL, NULL, gemm_packa_cntl, gemm_packb_cntl, NULL, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, NULL, NULL, NULL, NULL, NULL, gemm_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, NULL, NULL, NULL, NULL, NULL, gemm_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm_cntl = gemm_cntl_vl_mm; //bli_gemm_cntl_init_exp(); }
void bli_gemm_cntl_init() { // Create blocksize objects for each dimension. gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D, BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C, BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z ); gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D, BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C, BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z ); gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D, BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C, BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z ); gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D, BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C, BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z ); gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D, BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C, BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z ); gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D, BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C, BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z ); // Create objects for micro-panel alignment (in bytes). gemm_upanel_a_align = bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0, BLIS_UPANEL_A_ALIGN_SIZE_D, 0, BLIS_UPANEL_A_ALIGN_SIZE_C, 0, BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 ); gemm_upanel_b_align = bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0, BLIS_UPANEL_B_ALIGN_SIZE_D, 0, BLIS_UPANEL_B_ALIGN_SIZE_C, 0, BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 ); // Attach the register blksz_t objects as blocksize multiples to the cache // blksz_t objects. bli_blksz_obj_attach_mult_to( gemm_mr, gemm_mc ); bli_blksz_obj_attach_mult_to( gemm_nr, gemm_nc ); bli_blksz_obj_attach_mult_to( gemm_kr, gemm_kc ); // Attach the mr and nr blksz_t objects to each cache blksz_t object. // The primary example of why this is needed relates to nudging kc. // In hemm, symm, trmm, or trmm3, we need to know both mr and nr, // since the multiple we target in nudging depends on whether the // structured matrix is on the left or the right. bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_mc ); bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_nc ); bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS ); // Create function pointer object for reference micro-kernels. gemm_ref_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL_REF, FALSE, BLIS_DGEMM_UKERNEL_REF, FALSE, BLIS_CGEMM_UKERNEL_REF, FALSE, BLIS_ZGEMM_UKERNEL_REF, FALSE ); // Create control tree objects for packm operations. gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mr, gemm_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_kr, gemm_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, NULL, NULL, gemm_packa_cntl, gemm_packb_cntl, NULL, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, NULL, NULL, NULL, NULL, NULL, gemm_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, NULL, NULL, NULL, NULL, NULL, gemm_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm_cntl = gemm_cntl_vl_mm; }