void bli_gemm4mh_cntl_init() { // Create blocksize objects for each dimension. // NOTE: the complex blocksizes for 4mh are equal to their // corresponding real domain counterparts. gemm4mh_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); gemm4mh_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D ); gemm4mh_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D ); gemm4mh_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); gemm4mh_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); gemm4mh_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D ); // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. bli_blksz_obj_attach_to( gemm4mh_mr, gemm4mh_mc ); bli_blksz_obj_attach_to( gemm4mh_nr, gemm4mh_nc ); bli_blksz_obj_attach_to( gemm4mh_kr, gemm4mh_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm4mh_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMM4MH_UKERNEL, BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM4MH_UKERNEL, BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations (real only). gemm4mh_packa_cntl_ro = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_mr, gemm4mh_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_RO, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mh_packb_cntl_ro = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_kr, gemm4mh_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_RO, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (imag only). gemm4mh_packa_cntl_io = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_mr, gemm4mh_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_IO, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mh_packb_cntl_io = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_kr, gemm4mh_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_IO, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. gemm4mh_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm4mh_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // // Create control tree for A.real * B.real. // // Create control tree object for outer panel (to block-panel) // problem. (real x real) gemm4mh_cntl_op_bp_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, gemm4mh_ukrs, NULL, gemm4mh_packa_cntl_ro, gemm4mh_packb_cntl_ro, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (real x real) gemm4mh_cntl_mm_op_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_rr, NULL ); // Create control tree object for very large problem via multiple // general problems. (real x real) gemm4mh_cntl_vl_mm_rr = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_rr, NULL ); // // Create control tree for A.real * B.imag. // // Create control tree object for outer panel (to block-panel) // problem. (real x imag) gemm4mh_cntl_op_bp_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, gemm4mh_ukrs, NULL, gemm4mh_packa_cntl_ro, gemm4mh_packb_cntl_io, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (real x imag) gemm4mh_cntl_mm_op_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ri, NULL ); // Create control tree object for very large problem via multiple // general problems. (real x imag) gemm4mh_cntl_vl_mm_ri = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ri, NULL ); // // Create control tree for A.imag * B.real. // // Create control tree object for outer panel (to block-panel) // problem. (imag x real) gemm4mh_cntl_op_bp_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, gemm4mh_ukrs, NULL, gemm4mh_packa_cntl_io, gemm4mh_packb_cntl_ro, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (imag x real) gemm4mh_cntl_mm_op_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ir, NULL ); // Create control tree object for very large problem via multiple // general problems. (imag x real) gemm4mh_cntl_vl_mm_ir = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ir, NULL ); // // Create control tree for A.imag * B.imag. // // Create control tree object for outer panel (to block-panel) // problem. (imag x imag) gemm4mh_cntl_op_bp_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4mh_mc, gemm4mh_ukrs, NULL, gemm4mh_packa_cntl_io, gemm4mh_packb_cntl_io, NULL, gemm4mh_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. (imag x imag) gemm4mh_cntl_mm_op_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4mh_kc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_op_bp_ii, NULL ); // Create control tree object for very large problem via multiple // general problems. (imag x imag) gemm4mh_cntl_vl_mm_ii = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4mh_nc, gemm4mh_ukrs, NULL, NULL, NULL, NULL, gemm4mh_cntl_mm_op_ii, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm4mh_cntl_rr = gemm4mh_cntl_vl_mm_rr; gemm4mh_cntl_ri = gemm4mh_cntl_vl_mm_ri; gemm4mh_cntl_ir = gemm4mh_cntl_vl_mm_ir; gemm4mh_cntl_ii = gemm4mh_cntl_vl_mm_ii; }
void bli_gemm_cntl_init() { // Create blocksize objects for each dimension. gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D, BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C, BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z ); gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D, BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C, BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z ); gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S, BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D, BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C, BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z ); gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D, BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C, BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z ); gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D, BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C, BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z ); gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S, BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D, BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C, BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z ); // Create objects for micro-panel alignment (in bytes). gemm_upanel_a_align = bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0, BLIS_UPANEL_A_ALIGN_SIZE_D, 0, BLIS_UPANEL_A_ALIGN_SIZE_C, 0, BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 ); gemm_upanel_b_align = bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0, BLIS_UPANEL_B_ALIGN_SIZE_D, 0, BLIS_UPANEL_B_ALIGN_SIZE_C, 0, BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 ); // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. bli_blksz_obj_attach_to( gemm_mr, gemm_mc ); bli_blksz_obj_attach_to( gemm_nr, gemm_nc ); bli_blksz_obj_attach_to( gemm_kr, gemm_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS, BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. gemm_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mr, gemm_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK ); gemm_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_kr, gemm_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm_mc, gemm_ukrs, NULL, gemm_packa_cntl, gemm_packb_cntl, NULL, gemm_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm_kc, gemm_ukrs, NULL, NULL, NULL, NULL, gemm_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm_nc, gemm_ukrs, NULL, NULL, NULL, NULL, gemm_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm_cntl = gemm_cntl_vl_mm; }
void bli_gemm4m_cntl_init() { // Create blocksize objects for each dimension. gemm4m_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_MC_C, BLIS_EXTEND_4M_MC_C, BLIS_DEFAULT_4M_MC_Z, BLIS_EXTEND_4M_MC_Z ); gemm4m_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_NC_C, BLIS_EXTEND_4M_NC_C, BLIS_DEFAULT_4M_NC_Z, BLIS_EXTEND_4M_NC_Z ); gemm4m_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_KC_C, BLIS_EXTEND_4M_KC_C, BLIS_DEFAULT_4M_KC_Z, BLIS_EXTEND_4M_KC_Z ); gemm4m_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_MR_C, BLIS_EXTEND_4M_MR_C, BLIS_DEFAULT_4M_MR_Z, BLIS_EXTEND_4M_MR_Z ); gemm4m_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_NR_C, BLIS_EXTEND_4M_NR_C, BLIS_DEFAULT_4M_NR_Z, BLIS_EXTEND_4M_NR_Z ); gemm4m_kr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_4M_KR_C, BLIS_EXTEND_4M_KR_C, BLIS_DEFAULT_4M_KR_Z, BLIS_EXTEND_4M_KR_Z ); // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. bli_blksz_obj_attach_to( gemm4m_mr, gemm4m_mc ); bli_blksz_obj_attach_to( gemm4m_nr, gemm4m_nc ); bli_blksz_obj_attach_to( gemm4m_kr, gemm4m_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. gemm4m_ukrs = bli_func_obj_create( NULL, NULL, BLIS_CGEMM4M_UKERNEL, BLIS_ZGEMM4M_UKERNEL ); // Create control tree objects for packm operations. gemm4m_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT4, gemm4m_mr, gemm4m_kr, TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4M, BLIS_BUFFER_FOR_A_BLOCK ); gemm4m_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT4, gemm4m_kr, gemm4m_nr, TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_4M, BLIS_BUFFER_FOR_B_PANEL ); // // Create a control tree for packing A and B, and streaming C. // // Create control tree object for lowest-level block-panel kernel. gemm4m_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm4m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. gemm4m_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm4m_mc, NULL, NULL, gemm4m_packa_cntl, gemm4m_packb_cntl, NULL, gemm4m_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. gemm4m_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm4m_kc, NULL, NULL, NULL, NULL, NULL, gemm4m_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. gemm4m_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm4m_nc, NULL, NULL, NULL, NULL, NULL, gemm4m_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. gemm4m_cntl = gemm4m_cntl_vl_mm; }