void time_Gemm_hn( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old; fla_blocksize_t* bp; fla_gemm_t* cntl_gemm_blas; fla_gemm_t* cntl_gemm_var; bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_gemm_var = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ // Time reference implementation case 0: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C ); break; // Time variant 1 case 1:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var1( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 2 case 2:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var2( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 3 case 3:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var3( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 4 case 4:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var4( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 5 case 5:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var5( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 6 case 6:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hn_unb_var6( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hn_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_gemm_var ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1.0e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }
FLA_Error FLA_Gemm_hn( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { r_val = FLA_Gemm_hn_task( alpha, A, B, beta, C, cntl ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_Gemm_hn_blk_var1( alpha, A, B, beta, C, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_Gemm_hn_blk_var2( alpha, A, B, beta, C, cntl ); } #endif else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_Gemm_hn_blk_var3( alpha, A, B, beta, C, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT4 ) { r_val = FLA_Gemm_hn_blk_var4( alpha, A, B, beta, C, cntl ); } #endif else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT5 ) { r_val = FLA_Gemm_hn_blk_var5( alpha, A, B, beta, C, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT6 ) { r_val = FLA_Gemm_hn_blk_var6( alpha, A, B, beta, C, cntl ); } #endif #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 ) { r_val = FLA_Gemm_hn_unb_var1( alpha, A, B, beta, C ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 ) { r_val = FLA_Gemm_hn_unb_var2( alpha, A, B, beta, C ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 ) { r_val = FLA_Gemm_hn_unb_var3( alpha, A, B, beta, C ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT4 ) { r_val = FLA_Gemm_hn_unb_var4( alpha, A, B, beta, C ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT5 ) { r_val = FLA_Gemm_hn_unb_var5( alpha, A, B, beta, C ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT6 ) { r_val = FLA_Gemm_hn_unb_var6( alpha, A, B, beta, C ); } #endif else { r_val = FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } return r_val; }