void libfla_test_hemm_cntl_create( unsigned int var, dim_t b_alg_flat ) { int var_unb = FLA_UNB_VAR_OFFSET + var; int var_blk = FLA_BLK_VAR_OFFSET + var; hemm_cntl_bsize = FLA_Blocksize_create( b_alg_flat, b_alg_flat, b_alg_flat, b_alg_flat ); hemm_cntl_unb = FLA_Cntl_hemm_obj_create( FLA_FLAT, var_unb, NULL, NULL, NULL, NULL, NULL ); hemm_cntl_blk = FLA_Cntl_hemm_obj_create( FLA_FLAT, var_blk, hemm_cntl_bsize, fla_scal_cntl_blas, fla_hemm_cntl_blas, fla_gemm_cntl_blas, fla_gemm_cntl_blas ); }
void FLA_Hemm_cntl_init() { // Set blocksizes with default values for conventional storage. fla_hemm_var1_bsize = FLA_Query_blocksizes( FLA_DIMENSION_MIN ); fla_hemm_var9_bsize = FLA_Query_blocksizes( FLA_DIMENSION_MIN ); // Create a control tree that assumes A and B are b x b blocks. fla_hemm_cntl_blas = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL, NULL ); // Create a control tree that assumes A is a block and B is a panel. fla_hemm_cntl_bp = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_BLOCKED_VARIANT9, fla_hemm_var9_bsize, fla_scal_cntl_blas, fla_hemm_cntl_blas, NULL, NULL ); // Create a control tree that assumes A is large and B is a panel. fla_hemm_cntl_mp = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_BLOCKED_VARIANT1, fla_hemm_var1_bsize, fla_scal_cntl_blas, fla_hemm_cntl_blas, fla_gemm_cntl_blas, fla_gemm_cntl_blas ); // Create a control tree that assumes A and B are both large. fla_hemm_cntl_mm = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_BLOCKED_VARIANT9, fla_hemm_var9_bsize, fla_scal_cntl_blas, fla_hemm_cntl_mp, NULL, NULL ); }
void time_Hemm_ll( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old; fla_blocksize_t* bp; fla_gemm_t* cntl_gemm_blas; fla_hemm_t* cntl_hemm_blas; fla_hemm_t* cntl_hemm_var; if ( type == FLA_ALG_UNBLOCKED && n > 300 ) { *diff = 0.0; *gflops = 0.0; return; } bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_hemm_blas = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL ); cntl_hemm_var = FLA_Cntl_hemm_obj_create( FLA_FLAT, variant, bp, cntl_hemm_blas, cntl_gemm_blas, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: // Time reference implementation REF_Hemm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A, B, FLA_ONE, C ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var1( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 2:{ // Time variant 2 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var2( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var3( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 4:{ // Time variant 4 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var4( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 5:{ // Time variant 5 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var5( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 6:{ // Time variant 6 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var6( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 7:{ // Time variant 7 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var7( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var7( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 8:{ // Time variant 8 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var8( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var8( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 9:{ // Time variant 9 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var9( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var9( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } case 10:{ // Time variant 10 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Hemm_ll_unb_var10( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Hemm_ll_blk_var10( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_hemm_var ); FLA_Cntl_obj_free( cntl_hemm_blas ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }