Пример #1
0
void bli_gemm_cntl_init_exp()
{

	//
	// Create a control tree for packing A, and streaming B and C.
	//

	gemm_cntl_bp_ke5
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT5,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );
	gemm_cntl_pm_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          gemm_packa_cntl,
	                          NULL,
	                          NULL,
	                          gemm_cntl_bp_ke5,
	                          NULL );

	gemm_cntl_mm_pm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_pm_bp,
	                          NULL );

	gemm_cntl_vl_mm5
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_pm,
	                          NULL );

	gemm_cntl5 = gemm_cntl_vl_mm5;
}
Пример #2
0
void bli_gemm4mb_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm4mb_mc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MC_S/2, BLIS_MAXIMUM_MC_S/2,
	                      BLIS_DEFAULT_MC_D/2, BLIS_MAXIMUM_MC_D/2 );
	gemm4mb_nc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NC_S/2, BLIS_MAXIMUM_NC_S/2,
	                      BLIS_DEFAULT_NC_D/2, BLIS_MAXIMUM_NC_D/2 );
	gemm4mb_kc
	=
	bli_blksz_obj_create( 0,                   0,
	                      0,                   0,
	                      BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
	gemm4mb_mr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
	gemm4mb_nr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
	gemm4mb_kr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm4mb_mr, gemm4mb_mc );
	bli_blksz_obj_attach_mult_to( gemm4mb_nr, gemm4mb_nc );
	bli_blksz_obj_attach_mult_to( gemm4mb_kr, gemm4mb_kc );


	// The cache blocksizes that were scaled above need to be rounded down
	// to their respective nearest register blocksize multiples. Note that
	// this can only happen after the appropriate register blocksize is
	// actually attached as a multiple.
	bli_blksz_reduce_to_mult( gemm4mb_mc );
	bli_blksz_reduce_to_mult( gemm4mb_nc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mb_mr, gemm4mb_nr, gemm4mb_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm4mb_ukrs
	=
	bli_func_obj_create(
	    NULL,                  FALSE,
	    NULL,                  FALSE,
	    BLIS_CGEMM4MB_UKERNEL, BLIS_CGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS,
	    BLIS_ZGEMM4MB_UKERNEL, BLIS_ZGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations.
	gemm4mb_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mb_mr,
	                           gemm4mb_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_4MI,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mb_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mb_kr,
	                           gemm4mb_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_4MI,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm4mb_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT3,
	                          NULL,
	                          gemm4mb_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm4mb_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mb_mc,
	                          NULL,
	                          NULL,
	                          gemm4mb_packa_cntl,
	                          gemm4mb_packb_cntl,
	                          NULL,
	                          gemm4mb_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm4mb_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mb_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mb_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm4mb_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mb_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mb_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm4mb_cntl = gemm4mb_cntl_vl_mm;

}
Пример #3
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D,
	                      BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C,
	                      BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z );
	gemm_nc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D,
	                      BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C,
	                      BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z );
	gemm_kc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D,
	                      BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C,
	                      BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z );
	gemm_mr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D,
	                      BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C,
	                      BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z );
	gemm_nr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D,
	                      BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C,
	                      BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z );
	gemm_kr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D,
	                      BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C,
	                      BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z );


	// Create objects for micro-panel alignment (in bytes).
	gemm_upanel_a_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 );
	gemm_upanel_b_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 );


	// Attach the register blksz_t objects as sub-blocksizes to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_to( gemm_mr, gemm_mc );
	bli_blksz_obj_attach_to( gemm_nr, gemm_nc );
	bli_blksz_obj_attach_to( gemm_kr, gemm_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          gemm_ukrs,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          gemm_ukrs,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          gemm_ukrs,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;
}
Пример #4
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
	                                BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
	                                BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
	                                BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );

	gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
	                                BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
	                                BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
	                                BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );

	gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
	                                BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
	                                BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
	                                BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );

	gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
	                                BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
	                                BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
	                                BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );

	gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
	                                BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
	                                BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
	                                BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );

	gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
	                                BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
	                                BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
	                                BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );

	// Create control tree objects for packm/unpackm operations on C.
	gemm_packc_cntl
	=
	bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_nr,
	                           FALSE, // already dense; densify not necessary
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COLUMNS,
	                           BLIS_BUFFER_FOR_C_PANEL );

	gemm_unpackc_cntl
	=
	bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
	                             BLIS_VARIANT1,
	                             NULL ); // no blocksize needed


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;

#if 0
	//
	// Create a control tree for packing A, and streaming B and C.
	//

	gemm_cntl_bp_ke5
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT5,
	                          NULL, NULL, NULL, NULL,
	                          NULL, NULL, NULL, NULL );
	gemm_cntl_pm_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          gemm_packa_cntl,
	                          NULL,
	                          //gemm_packc_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke5,
	                          //gemm_unpackc_cntl );
	                          NULL );

	gemm_cntl_mm_pm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_pm_bp,
	                          NULL );

	gemm_cntl_vl_mm5
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_pm,
	                          NULL );

	gemm_cntl_packa = gemm_cntl_vl_mm5;
#endif
}
Пример #5
0
void bli_gemm3m3_cntl_init()
{
    // Create blocksize objects for each dimension.
    // NOTE: the complex blocksizes for 3m3 are generally equal to their
    // corresponding real domain counterparts. However, we want to promote
    // similar cache footprints for the micro-panels of A and B (when
    // compared to executing in the real domain), and since the complex
    // micro-panels are three times as "fat" (due to storing real, imaginary
    // and real+imaginary parts), we reduce KC by a factor of 2 to
    // compensate. Ideally, we would reduce by a factor of 3, but that
    // could get messy vis-a-vis keeping KC a multiple of the register
    // blocksizes.
    gemm3m3_mc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
                                  BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
    gemm3m3_nc
        =
            bli_blksz_obj_create( 0,                   0,
                                  0,                   0,
                                  BLIS_DEFAULT_NC_S/3, BLIS_MAXIMUM_NC_S/3,
                                  BLIS_DEFAULT_NC_D/3, BLIS_MAXIMUM_NC_D/3 );
    gemm3m3_kc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
                                  BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
    gemm3m3_mr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
                                  BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
    gemm3m3_nr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
                                  BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
    gemm3m3_kr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
                                  BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


    // Attach the register blksz_t objects as blocksize multiples to the cache
    // blksz_t objects.
    bli_blksz_obj_attach_mult_to( gemm3m3_mr, gemm3m3_mc );
    bli_blksz_obj_attach_mult_to( gemm3m3_nr, gemm3m3_nc );
    bli_blksz_obj_attach_mult_to( gemm3m3_kr, gemm3m3_kc );


    // The cache blocksizes that were scaled above need to be rounded down
    // to their respective nearest register blocksize multiples. Note that
    // this can only happen after the appropriate register blocksize is
    // actually attached as a multiple.
    bli_blksz_reduce_to_mult( gemm3m3_nc );


    // Attach the mr and nr blksz_t objects to each cache blksz_t object.
    // The primary example of why this is needed relates to nudging kc.
    // In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
    // since the multiple we target in nudging depends on whether the
    // structured matrix is on the left or the right.
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_mc );
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_nc );
    bli_blksz_obj_attach_mr_nr_to( gemm3m3_mr, gemm3m3_nr, gemm3m3_kc );


    // Create function pointer object for each datatype-specific gemm
    // micro-kernel.
    gemm3m3_ukrs
        =
            bli_func_obj_create(
                NULL,                  FALSE,
                NULL,                  FALSE,
                BLIS_CGEMM3M3_UKERNEL, BLIS_CGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS,
                BLIS_ZGEMM3M3_UKERNEL, BLIS_ZGEMM3M3_UKERNEL_PREFERS_CONTIG_ROWS );


    // Create control tree objects for packm operations.
    gemm3m3_packb_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT2,
                                       gemm3m3_kr,
                                       gemm3m3_nr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_COL_PANELS_3MS,
                                       BLIS_BUFFER_FOR_B_PANEL );


    //
    // Create a control tree for packing A and B, and streaming C.
    //

    // Create control tree object for lowest-level block-panel kernel.
    gemm3m3_cntl_bp_ke
        =
            bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
                                      BLIS_VARIANT2,
                                      NULL,
                                      gemm3m3_ukrs,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL );

    // Create control tree object for outer panel (to block-panel)
    // problem.
    gemm3m3_cntl_op_bp
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT4,
                                      gemm3m3_mc,
                                      NULL,
                                      NULL,
                                      NULL, // packm cntl nodes accessed directly from blk_var4
                                      gemm3m3_packb_cntl,
                                      NULL,
                                      gemm3m3_cntl_bp_ke,
                                      NULL );

    // Create control tree object for general problem via multiple
    // rank-k (outer panel) updates.
    gemm3m3_cntl_mm_op
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT3,
                                      gemm3m3_kc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm3m3_cntl_op_bp,
                                      NULL );

    // Create control tree object for very large problem via multiple
    // general problems.
    gemm3m3_cntl_vl_mm
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemm3m3_nc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm3m3_cntl_mm_op,
                                      NULL );

    // Alias the "master" gemm control tree to a shorter name.
    gemm3m3_cntl = gemm3m3_cntl_vl_mm;

}
Пример #6
0
void bli_gemm4m1_cntl_init()
{
    // Create blocksize objects for each dimension.
    // NOTE: the complex blocksizes for 4m1 are generally equal to their
    // corresponding real domain counterparts. However, we want to promote
    // similar cache footprints for the micro-panels of A and B (when
    // compared to executing in the real domain), and since the complex
    // micro-panels are twice as "fat" (due to storing real and imaginary
    // parts), we reduce KC by a factor of 2 to compensate.
    gemm4m1_mc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
                                  BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
    gemm4m1_nc
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
                                  BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D );
    gemm4m1_kc
        =
            bli_blksz_obj_create( 0,                   0,
                                  0,                   0,
                                  BLIS_DEFAULT_KC_S/2, BLIS_MAXIMUM_KC_S/2,
                                  BLIS_DEFAULT_KC_D/2, BLIS_MAXIMUM_KC_D/2 );
    gemm4m1_mr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
                                  BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
    gemm4m1_nr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
                                  BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
    gemm4m1_kr
        =
            bli_blksz_obj_create( 0,                 0,
                                  0,                 0,
                                  BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
                                  BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


    // Attach the register blksz_t objects as blocksize multiples to the cache
    // blksz_t objects.
    bli_blksz_obj_attach_mult_to( gemm4m1_mr, gemm4m1_mc );
    bli_blksz_obj_attach_mult_to( gemm4m1_nr, gemm4m1_nc );
    bli_blksz_obj_attach_mult_to( gemm4m1_kr, gemm4m1_kc );


    // Attach the mr and nr blksz_t objects to each cache blksz_t object.
    // The primary example of why this is needed relates to nudging kc.
    // In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
    // since the multiple we target in nudging depends on whether the
    // structured matrix is on the left or the right.
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_mc );
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_nc );
    bli_blksz_obj_attach_mr_nr_to( gemm4m1_mr, gemm4m1_nr, gemm4m1_kc );


    // Create function pointer object for each datatype-specific gemm
    // micro-kernel.
    gemm4m1_ukrs
        =
            bli_func_obj_create(
                NULL,                 FALSE,
                NULL,                 FALSE,
                BLIS_CGEMM4M1_UKERNEL, BLIS_CGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS,
                BLIS_ZGEMM4M1_UKERNEL, BLIS_ZGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS );


    // Create control tree objects for packm operations.
    gemm4m1_packa_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT1,
                                       gemm4m1_mr,
                                       gemm4m1_kr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_ROW_PANELS_4MI,
                                       BLIS_BUFFER_FOR_A_BLOCK );

    gemm4m1_packb_cntl
        =
            bli_packm_cntl_obj_create( BLIS_BLOCKED,
                                       BLIS_VARIANT1,
                                       gemm4m1_kr,
                                       gemm4m1_nr,
                                       FALSE, // do NOT invert diagonal
                                       FALSE, // reverse iteration if upper?
                                       FALSE, // reverse iteration if lower?
                                       BLIS_PACKED_COL_PANELS_4MI,
                                       BLIS_BUFFER_FOR_B_PANEL );


    //
    // Create a control tree for packing A and B, and streaming C.
    //

    // Create control tree object for lowest-level block-panel kernel.
    gemm4m1_cntl_bp_ke
        =
            bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
                                      BLIS_VARIANT2,
                                      NULL,
                                      gemm4m1_ukrs,
                                      NULL, NULL, NULL,
                                      NULL, NULL, NULL );

    // Create control tree object for outer panel (to block-panel)
    // problem.
    gemm4m1_cntl_op_bp
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT1,
                                      gemm4m1_mc,
                                      NULL,
                                      NULL,
                                      gemm4m1_packa_cntl,
                                      gemm4m1_packb_cntl,
                                      NULL,
                                      gemm4m1_cntl_bp_ke,
                                      NULL );

    // Create control tree object for general problem via multiple
    // rank-k (outer panel) updates.
    gemm4m1_cntl_mm_op
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT3,
                                      gemm4m1_kc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm4m1_cntl_op_bp,
                                      NULL );

    // Create control tree object for very large problem via multiple
    // general problems.
    gemm4m1_cntl_vl_mm
        =
            bli_gemm_cntl_obj_create( BLIS_BLOCKED,
                                      BLIS_VARIANT2,
                                      gemm4m1_nc,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      NULL,
                                      gemm4m1_cntl_mm_op,
                                      NULL );

    // Alias the "master" gemm control tree to a shorter name.
    gemm4m1_cntl = gemm4m1_cntl_vl_mm;

}
Пример #7
0
void bli_gemm4mh_cntl_init()
{
	// Create blocksize objects for each dimension.
	// NOTE: the complex blocksizes for 4mh are equal to their
	// corresponding real domain counterparts.
	gemm4mh_mc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D );
	gemm4mh_nc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D );
	gemm4mh_kc
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D );
	gemm4mh_mr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D );
	gemm4mh_nr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D );
	gemm4mh_kr
	=
	bli_blksz_obj_create( 0,                 0,
	                      0,                 0,
	                      BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm4mh_mr, gemm4mh_mc );
	bli_blksz_obj_attach_mult_to( gemm4mh_nr, gemm4mh_nc );
	bli_blksz_obj_attach_mult_to( gemm4mh_kr, gemm4mh_kc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm4mh_mr, gemm4mh_nr, gemm4mh_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm4mh_ukrs
	=
	bli_func_obj_create(
	    NULL,                  FALSE,
	    NULL,                  FALSE,
	    BLIS_CGEMM4MH_UKERNEL, BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS,
	    BLIS_ZGEMM4MH_UKERNEL, BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create control tree objects for packm operations (real only).
	gemm4mh_packa_cntl_ro
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_mr,
	                           gemm4mh_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_RO,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mh_packb_cntl_ro
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_kr,
	                           gemm4mh_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_RO,
	                           BLIS_BUFFER_FOR_B_PANEL );

	// Create control tree objects for packm operations (imag only).
	gemm4mh_packa_cntl_io
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_mr,
	                           gemm4mh_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS_IO,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm4mh_packb_cntl_io
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT2,
	                           gemm4mh_kr,
	                           gemm4mh_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS_IO,
	                           BLIS_BUFFER_FOR_B_PANEL );


	// Create control tree object for lowest-level block-panel kernel.
	gemm4mh_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm4mh_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	//
	// Create control tree for A.real * B.real.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (real x real)
	gemm4mh_cntl_op_bp_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_ro,
	                          gemm4mh_packb_cntl_ro,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (real x real)
	gemm4mh_cntl_mm_op_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_rr,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (real x real)
	gemm4mh_cntl_vl_mm_rr
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_rr,
	                          NULL );

	//
	// Create control tree for A.real * B.imag.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (real x imag)
	gemm4mh_cntl_op_bp_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_ro,
	                          gemm4mh_packb_cntl_io,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (real x imag)
	gemm4mh_cntl_mm_op_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ri,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (real x imag)
	gemm4mh_cntl_vl_mm_ri
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ri,
	                          NULL );

	//
	// Create control tree for A.imag * B.real.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (imag x real)
	gemm4mh_cntl_op_bp_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_io,
	                          gemm4mh_packb_cntl_ro,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (imag x real)
	gemm4mh_cntl_mm_op_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ir,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (imag x real)
	gemm4mh_cntl_vl_mm_ir
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ir,
	                          NULL );

	//
	// Create control tree for A.imag * B.imag.
	//

	// Create control tree object for outer panel (to block-panel)
	// problem. (imag x imag)
	gemm4mh_cntl_op_bp_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm4mh_mc,
	                          NULL,
	                          NULL,
	                          gemm4mh_packa_cntl_io,
	                          gemm4mh_packb_cntl_io,
	                          NULL,
	                          gemm4mh_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates. (imag x imag)
	gemm4mh_cntl_mm_op_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm4mh_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_op_bp_ii,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems. (imag x imag)
	gemm4mh_cntl_vl_mm_ii
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm4mh_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm4mh_cntl_mm_op_ii,
	                          NULL );


	// Alias the "master" gemm control tree to a shorter name.
	gemm4mh_cntl_rr = gemm4mh_cntl_vl_mm_rr;
	gemm4mh_cntl_ri = gemm4mh_cntl_vl_mm_ri;
	gemm4mh_cntl_ir = gemm4mh_cntl_vl_mm_ir;
	gemm4mh_cntl_ii = gemm4mh_cntl_vl_mm_ii;

}
Пример #8
0
cntl_t* bli_gemm_cntl_create
     (
       opid_t family
     )
{
	void* macro_kernel_p = bli_gemm_ker_var2;


	// Change the macro-kernel if the operation family is herk or trmm.
	if      ( family == BLIS_HERK ) macro_kernel_p = bli_herk_x_ker_var2;
	else if ( family == BLIS_TRMM ) macro_kernel_p = bli_trmm_xx_ker_var2;

	// Create two nodes for the macro-kernel.
	cntl_t* gemm_cntl_bu_ke = bli_gemm_cntl_obj_create
	(
	  BLIS_MR, // needed for bli_thrinfo_rgrow()
	  NULL,    // variant function pointer not used
	  NULL     // no sub-node; this is the leaf of the tree.
	);

	cntl_t* gemm_cntl_bp_bu = bli_gemm_cntl_obj_create
	(
	  BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow()
	  macro_kernel_p,
	  gemm_cntl_bu_ke
	);

	// Create a node for packing matrix A.
	cntl_t* gemm_cntl_packa = bli_packm_cntl_obj_create
	(
	  bli_gemm_packa,
	  bli_packm_blk_var1,
	  BLIS_MR,
	  BLIS_KR,
	  FALSE,   // do NOT invert diagonal
	  FALSE,   // reverse iteration if upper?
	  FALSE,   // reverse iteration if lower?
	  BLIS_PACKED_ROW_PANELS,
	  BLIS_BUFFER_FOR_A_BLOCK,
	  gemm_cntl_bp_bu
	);

	// Create a node for partitioning the m dimension by MC.
	cntl_t* gemm_cntl_op_bp = bli_gemm_cntl_obj_create
	(
	  BLIS_MC,
	  bli_gemm_blk_var1,
	  gemm_cntl_packa
	);

	// Create a node for packing matrix B.
	cntl_t* gemm_cntl_packb = bli_packm_cntl_obj_create
	(
	  bli_gemm_packb,
	  bli_packm_blk_var1,
	  BLIS_KR,
	  BLIS_NR,
	  FALSE,   // do NOT invert diagonal
	  FALSE,   // reverse iteration if upper?
	  FALSE,   // reverse iteration if lower?
	  BLIS_PACKED_COL_PANELS,
	  BLIS_BUFFER_FOR_B_PANEL,
	  gemm_cntl_op_bp
	);

	// Create a node for partitioning the k dimension by KC.
	cntl_t* gemm_cntl_mm_op = bli_gemm_cntl_obj_create
	(
	  BLIS_KC,
	  bli_gemm_blk_var3,
	  gemm_cntl_packb
	);

	// Create a node for partitioning the n dimension by NC.
	cntl_t* gemm_cntl_vl_mm = bli_gemm_cntl_obj_create
	(
	  BLIS_NC,
	  bli_gemm_blk_var2,
	  gemm_cntl_mm_op
	);

	return gemm_cntl_vl_mm;
}
Пример #9
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
	                                BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
	                                BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
	                                BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );

	gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
	                                BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
	                                BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
	                                BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );

	gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
	                                BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
	                                BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
	                                BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );

	gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
	                                BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
	                                BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
	                                BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );

	gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
	                                BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
	                                BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
	                                BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );

	gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, 0,
	                                BLIS_DEFAULT_KR_D, 0,
	                                BLIS_DEFAULT_KR_C, 0,
	                                BLIS_DEFAULT_KR_Z, 0 );

	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs = bli_func_obj_create( BLIS_SGEMM_UKERNEL,
	                                 BLIS_DGEMM_UKERNEL,
	                                 BLIS_CGEMM_UKERNEL,
	                                 BLIS_ZGEMM_UKERNEL );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           TRUE,  // densify; used by hemm/symm
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           TRUE,  // densify; used by hemm/symm
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;

	//bli_gemm_cntl_init_exp();
}
Пример #10
0
void bli_gemm_cntl_init()
{
	// Create blocksize objects for each dimension.
	gemm_mc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S,
	                      BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D,
	                      BLIS_DEFAULT_MC_C, BLIS_MAXIMUM_MC_C,
	                      BLIS_DEFAULT_MC_Z, BLIS_MAXIMUM_MC_Z );
	gemm_nc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S,
	                      BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D,
	                      BLIS_DEFAULT_NC_C, BLIS_MAXIMUM_NC_C,
	                      BLIS_DEFAULT_NC_Z, BLIS_MAXIMUM_NC_Z );
	gemm_kc
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_MAXIMUM_KC_S,
	                      BLIS_DEFAULT_KC_D, BLIS_MAXIMUM_KC_D,
	                      BLIS_DEFAULT_KC_C, BLIS_MAXIMUM_KC_C,
	                      BLIS_DEFAULT_KC_Z, BLIS_MAXIMUM_KC_Z );
	gemm_mr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S,
	                      BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D,
	                      BLIS_DEFAULT_MR_C, BLIS_PACKDIM_MR_C,
	                      BLIS_DEFAULT_MR_Z, BLIS_PACKDIM_MR_Z );
	gemm_nr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S,
	                      BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D,
	                      BLIS_DEFAULT_NR_C, BLIS_PACKDIM_NR_C,
	                      BLIS_DEFAULT_NR_Z, BLIS_PACKDIM_NR_Z );
	gemm_kr
	=
	bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_PACKDIM_KR_S,
	                      BLIS_DEFAULT_KR_D, BLIS_PACKDIM_KR_D,
	                      BLIS_DEFAULT_KR_C, BLIS_PACKDIM_KR_C,
	                      BLIS_DEFAULT_KR_Z, BLIS_PACKDIM_KR_Z );


	// Create objects for micro-panel alignment (in bytes).
	gemm_upanel_a_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_A_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_A_ALIGN_SIZE_Z, 0 );
	gemm_upanel_b_align
	=
	bli_blksz_obj_create( BLIS_UPANEL_B_ALIGN_SIZE_S, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_D, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_C, 0,
	                      BLIS_UPANEL_B_ALIGN_SIZE_Z, 0 );


	// Attach the register blksz_t objects as blocksize multiples to the cache
	// blksz_t objects.
	bli_blksz_obj_attach_mult_to( gemm_mr, gemm_mc );
	bli_blksz_obj_attach_mult_to( gemm_nr, gemm_nc );
	bli_blksz_obj_attach_mult_to( gemm_kr, gemm_kc );


	// Attach the mr and nr blksz_t objects to each cache blksz_t object.
	// The primary example of why this is needed relates to nudging kc.
	// In hemm, symm, trmm, or trmm3, we need to know both mr and nr,
	// since the multiple we target in nudging depends on whether the
	// structured matrix is on the left or the right.
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_mc );
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_nc );
	bli_blksz_obj_attach_mr_nr_to( gemm_mr, gemm_nr, gemm_kc );


	// Create function pointer object for each datatype-specific gemm
	// micro-kernel.
	gemm_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL, BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_DGEMM_UKERNEL, BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_CGEMM_UKERNEL, BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS,
	                     BLIS_ZGEMM_UKERNEL, BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS );


	// Create function pointer object for reference micro-kernels.
	gemm_ref_ukrs
	=
	bli_func_obj_create( BLIS_SGEMM_UKERNEL_REF, FALSE,
	                     BLIS_DGEMM_UKERNEL_REF, FALSE,
	                     BLIS_CGEMM_UKERNEL_REF, FALSE,
	                     BLIS_ZGEMM_UKERNEL_REF, FALSE );


	// Create control tree objects for packm operations.
	gemm_packa_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_mr,
	                           gemm_kr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_ROW_PANELS,
	                           BLIS_BUFFER_FOR_A_BLOCK );

	gemm_packb_cntl
	=
	bli_packm_cntl_obj_create( BLIS_BLOCKED,
	                           BLIS_VARIANT1,
	                           gemm_kr,
	                           gemm_nr,
	                           FALSE, // do NOT invert diagonal
	                           FALSE, // reverse iteration if upper?
	                           FALSE, // reverse iteration if lower?
	                           BLIS_PACKED_COL_PANELS,
	                           BLIS_BUFFER_FOR_B_PANEL );


	//
	// Create a control tree for packing A and B, and streaming C.
	//

	// Create control tree object for lowest-level block-panel kernel.
	gemm_cntl_bp_ke
	=
	bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
	                          BLIS_VARIANT2,
	                          NULL,
	                          gemm_ukrs,
	                          NULL, NULL, NULL,
	                          NULL, NULL, NULL );

	// Create control tree object for outer panel (to block-panel)
	// problem.
	gemm_cntl_op_bp
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT1,
	                          gemm_mc,
	                          NULL,
	                          NULL,
	                          gemm_packa_cntl,
	                          gemm_packb_cntl,
	                          NULL,
	                          gemm_cntl_bp_ke,
	                          NULL );

	// Create control tree object for general problem via multiple
	// rank-k (outer panel) updates.
	gemm_cntl_mm_op
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT3,
	                          gemm_kc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_op_bp,
	                          NULL );

	// Create control tree object for very large problem via multiple
	// general problems.
	gemm_cntl_vl_mm
	=
	bli_gemm_cntl_obj_create( BLIS_BLOCKED,
	                          BLIS_VARIANT2,
	                          gemm_nc,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          NULL,
	                          gemm_cntl_mm_op,
	                          NULL );

	// Alias the "master" gemm control tree to a shorter name.
	gemm_cntl = gemm_cntl_vl_mm;
}