cntl_t* bli_trsm_l_cntl_create ( void ) { void* macro_kernel_p = bli_trsm_xx_ker_var2; // Create two nodes for the macro-kernel. cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_obj_create ( BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_obj_create ( BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, trsm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* trsm_cntl_packa = bli_packm_cntl_obj_create ( bli_trsm_packa, bli_packm_blk_var1, BLIS_MR, BLIS_MR, TRUE, // do NOT invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_obj_create ( BLIS_MC, bli_trsm_blk_var1, trsm_cntl_packa ); // Create a node for packing matrix B. cntl_t* trsm_cntl_packb = bli_packm_cntl_obj_create ( bli_trsm_packb, bli_packm_blk_var1, BLIS_MR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_obj_create ( BLIS_KC, bli_trsm_blk_var3, trsm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_obj_create ( BLIS_NC, bli_trsm_blk_var2, trsm_cntl_mm_op ); return trsm_cntl_vl_mm; }
void bli_trsm3m_cntl_init() { // Create function pointer objects for each datatype-specific // gemmtrsm3m_l and gemmtrsm3m_u micro-kernel. gemmtrsm3m_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMMTRSM3M_L_UKERNEL, FALSE, BLIS_ZGEMMTRSM3M_L_UKERNEL, FALSE ); gemmtrsm3m_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CGEMMTRSM3M_U_UKERNEL, FALSE, BLIS_ZGEMMTRSM3M_U_UKERNEL, FALSE ); // Create function pointer objects for each datatype-specific // trsm3m_l and trsm3m_u micro-kernel. trsm3m_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CTRSM3M_L_UKERNEL, FALSE, BLIS_ZTRSM3M_L_UKERNEL, FALSE ); trsm3m_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, BLIS_CTRSM3M_U_UKERNEL, FALSE, BLIS_ZTRSM3M_U_UKERNEL, FALSE ); // Create control tree objects for packm operations (left side). trsm3m_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases gemm3m_mr, gemm3m_mr, TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm gemm3m_mr, gemm3m_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). trsm3m_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nr, gemm3m_mr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3M, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_mr, gemm3m_mr, TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS_3M, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree object for lowest-level block-panel kernel. trsm3m_cntl_bp_ke = bli_trsm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, gemm3m_ukrs, gemmtrsm3m_l_ukrs, gemmtrsm3m_u_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). trsm3m_l_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, gemm3m_ukrs, NULL, NULL, NULL, trsm3m_l_packa_cntl, trsm3m_l_packb_cntl, NULL, trsm3m_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). trsm3m_l_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). trsm3m_l_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_l_cntl_mm_op, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (right side). trsm3m_r_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, gemm3m_mc, gemm3m_ukrs, NULL, NULL, NULL, trsm3m_r_packa_cntl, trsm3m_r_packb_cntl, NULL, trsm3m_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (right side). trsm3m_r_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, gemm3m_kc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_r_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (right side). trsm3m_r_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, gemm3m_nc, gemm3m_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, trsm3m_r_cntl_mm_op, NULL, NULL ); // Alias the "master" trsm control trees to shorter names. trsm3m_l_cntl = trsm3m_l_cntl_vl_mm; trsm3m_r_cntl = trsm3m_r_cntl_vl_mm; }