void FLASH_UDdate_UT_cntl_finalize() { FLA_Cntl_obj_free( flash_uddateut_cntl_leaf ); FLA_Cntl_obj_free( flash_uddateut_cntl ); FLA_Blocksize_free( flash_uddateut_var2_bsize ); }
void FLASH_Ttmm_cntl_finalize() { FLA_Cntl_obj_free( flash_ttmm_cntl_leaf ); FLA_Cntl_obj_free( flash_ttmm_cntl ); FLA_Blocksize_free( flash_ttmm_bsize ); }
void FLASH_LU_piv_cntl_finalize() { FLA_Cntl_obj_free( flash_lu_piv_cntl_leaf ); FLA_Cntl_obj_free( flash_lu_piv_cntl ); FLA_Blocksize_free( flash_lu_piv_bsize ); }
void FLASH_LQ_UT_cntl_finalize() { FLA_Cntl_obj_free( flash_lqut_cntl_leaf ); FLA_Cntl_obj_free( flash_lqut_cntl ); FLA_Blocksize_free( flash_lqut_var3_bsize ); }
void libfla_test_symm_cntl_free( void ) { FLA_Blocksize_free( symm_cntl_bsize ); FLA_Cntl_obj_free( symm_cntl_unb ); FLA_Cntl_obj_free( symm_cntl_blk ); }
void FLA_CAQR2_UT_cntl_finalize() { FLA_Cntl_obj_free( fla_caqr2ut_cntl_unb ); FLA_Cntl_obj_free( fla_caqr2ut_cntl_leaf ); FLA_Blocksize_free( fla_caqr2ut_var1_bsize ); }
void FLASH_QR2_UT_cntl_finalize() { FLA_Cntl_obj_free( flash_qr2ut_cntl_leaf ); FLA_Cntl_obj_free( flash_qr2ut_cntl ); FLA_Blocksize_free( flash_qr2ut_var2_bsize ); }
void FLASH_Eig_gest_cntl_finalize() { FLA_Cntl_obj_free( flash_eig_gest_cntl_leaf ); FLA_Cntl_obj_free( flash_eig_gest_cntl ); FLA_Blocksize_free( flash_eig_gest_bsize ); }
void FLASH_Copyr_cntl_finalize() { FLA_Cntl_obj_free( flash_copyr_cntl_blas ); FLA_Cntl_obj_free( flash_copyr_cntl ); FLA_Blocksize_free( flash_copyr_bsize ); }
void FLASH_Sylv_cntl_finalize() { FLA_Cntl_obj_free( flash_sylv_cntl_leaf ); FLA_Cntl_obj_free( flash_sylv_cntl_mb ); FLA_Cntl_obj_free( flash_sylv_cntl ); FLA_Blocksize_free( flash_sylv_bsize ); }
void libfla_test_eig_gest_cntl_free( void ) { FLA_Blocksize_free( eig_gest_cntl_bsize ); FLA_Cntl_obj_free( eig_gest_cntl_unb ); FLA_Cntl_obj_free( eig_gest_cntl_opt ); FLA_Cntl_obj_free( eig_gest_cntl_blk ); }
void FLASH_Apply_pivots_cntl_finalize() { FLA_Cntl_obj_free( flash_appiv_cntl_leaf ); FLA_Cntl_obj_free( flash_appiv_cntl_bp ); FLA_Cntl_obj_free( flash_appiv_cntl ); FLA_Blocksize_free( flash_appiv_bsize ); }
void FLASH_Scalr_cntl_finalize() { FLA_Cntl_obj_free( flash_scalr_cntl_blas ); FLA_Cntl_obj_free( flash_scalr_cntl ); FLA_Blocksize_free( flash_scalr_bsize ); }
void libfla_test_lu_piv_cntl_free( void ) { FLA_Blocksize_free( lu_piv_cntl_bsize ); FLA_Cntl_obj_free( lu_piv_cntl_unb ); FLA_Cntl_obj_free( lu_piv_cntl_opt ); FLA_Cntl_obj_free( lu_piv_cntl_blk ); }
void FLASH_Trsv_cntl_finalize() { FLA_Cntl_obj_free( flash_trsv_cntl_blas ); FLA_Cntl_obj_free( flash_trsv_cntl ); FLA_Blocksize_free( flash_trsv_bsize ); }
void FLASH_Apply_Q_UT_cntl_finalize() { FLA_Cntl_obj_free( flash_apqut_cntl_leaf ); FLA_Cntl_obj_free( flash_apqut_cntl ); FLA_Cntl_obj_free( flash_apqut_cntl_blas ); FLA_Blocksize_free( flash_apqut_var1_bsize ); FLA_Blocksize_free( flash_apqut_var2_bsize ); }
void FLASH_Apply_CAQ2_UT_cntl_finalize() { FLA_Cntl_obj_free( flash_apcaq2ut_cntl_leaf ); FLA_Cntl_obj_free( flash_apcaq2ut_cntl_mid ); FLA_Cntl_obj_free( flash_apcaq2ut_cntl ); FLA_Blocksize_free( flash_apcaq2ut_var2_bsize ); FLA_Blocksize_free( flash_apcaq2ut_var3_bsize ); }
void FLASH_Syrk_cntl_finalize() { FLA_Cntl_obj_free( flash_syrk_cntl_blas ); FLA_Cntl_obj_free( flash_syrk_cntl_ip ); FLA_Cntl_obj_free( flash_syrk_cntl_op ); FLA_Cntl_obj_free( flash_syrk_cntl_mm ); FLA_Blocksize_free( flash_syrk_bsize ); }
void FLASH_Trsm_cntl_finalize() { FLA_Cntl_obj_free( flash_trsm_cntl_blas ); FLA_Cntl_obj_free( flash_trsm_cntl_bp ); FLA_Cntl_obj_free( flash_trsm_cntl_mp ); FLA_Cntl_obj_free( flash_trsm_cntl_mm ); FLA_Blocksize_free( flash_trsm_bsize ); }
void FLA_LU_piv_cntl_finalize() { FLA_Cntl_obj_free( fla_lu_piv_cntl ); FLA_Cntl_obj_free( fla_lu_piv_cntl2 ); FLA_Cntl_obj_free( fla_lu_piv_cntl_in ); FLA_Cntl_obj_free( fla_lu_piv_cntl_leaf ); FLA_Blocksize_free( fla_lu_piv_var5_bsize ); FLA_Blocksize_free( fla_lu_piv_var5_bsize_in ); }
void libfla_test_qrut_cntl_free( void ) { FLA_Blocksize_free( qrut_cntl_bsize ); FLA_Cntl_obj_free( apqut_cntl_blk ); FLA_Cntl_obj_free( qrut_cntl_unb ); FLA_Cntl_obj_free( qrut_cntl_opt ); FLA_Cntl_obj_free( qrut_cntl_blk ); FLA_Cntl_obj_free( qrut_cntl_blk_sub ); }
void FLA_Trsm_cntl_finalize() { FLA_Cntl_obj_free( fla_trsm_cntl_blas ); FLA_Cntl_obj_free( fla_trsm_cntl_bp ); FLA_Cntl_obj_free( fla_trsm_cntl_mp ); FLA_Cntl_obj_free( fla_trsm_cntl_mm ); FLA_Blocksize_free( fla_trsm_var2_bsize ); FLA_Blocksize_free( fla_trsm_var3_bsize ); }
void FLA_Hemm_cntl_finalize() { FLA_Cntl_obj_free( fla_hemm_cntl_blas ); FLA_Cntl_obj_free( fla_hemm_cntl_bp ); FLA_Cntl_obj_free( fla_hemm_cntl_mp ); FLA_Cntl_obj_free( fla_hemm_cntl_mm ); FLA_Blocksize_free( fla_hemm_var1_bsize ); FLA_Blocksize_free( fla_hemm_var9_bsize ); }
void FLA_Her2k_cntl_finalize() { FLA_Cntl_obj_free( fla_her2k_cntl_blas ); FLA_Cntl_obj_free( fla_her2k_cntl_ip ); FLA_Cntl_obj_free( fla_her2k_cntl_op ); FLA_Cntl_obj_free( fla_her2k_cntl_mm ); FLA_Blocksize_free( fla_her2k_var3_bsize ); FLA_Blocksize_free( fla_her2k_var9_bsize ); }
void FLA_Apply_Q_UT_cntl_finalize() { FLA_Cntl_obj_free( fla_apqut_cntl_leaf ); /* FLA_Cntl_obj_free( fla_apqut_cntl ); */ FLA_Blocksize_free( fla_apqut_var1_bsize ); FLA_Blocksize_free( fla_apqut_var2_bsize ); }
void FLASH_UDdate_UT_inc_cntl_finalize() { FLA_Cntl_obj_free( flash_uddateutinc_cntl ); FLA_Blocksize_free( flash_uddateutinc_var1_bsize ); }
void time_Her2k_ln( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old; fla_blocksize_t* bp; fla_gemm_t* cntl_gemm_blas; fla_her2k_t* cntl_her2k_blas; fla_her2k_t* cntl_her2k_var; bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL ); cntl_her2k_var = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: // Time reference implementation REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 2:{ // Time variant 2 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 4:{ // Time variant 4 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 5:{ // Time variant 5 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var5( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 6:{ // Time variant 6 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var6( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 7:{ // Time variant 7 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var7( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var7( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 8:{ // Time variant 8 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var8( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var8( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 9:{ // Time variant 9 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var9( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var9( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } case 10:{ // Time variant 10 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Her2k_ln_unb_var10( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Her2k_ln_blk_var10( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_her2k_var ); FLA_Cntl_obj_free( cntl_her2k_blas ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }
void FLASH_CAQR_UT_inc_cntl_finalize() { FLA_Cntl_obj_free( flash_caqrutinc_cntl ); FLA_Blocksize_free( flash_caqrutinc_var1_bsize ); }
void time_Sylv_nn( int variant, int type, int n_repeats, int m, int n, int nb_alg, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old; fla_blocksize_t* bp; fla_sylv_t* cntl_sylv_var; fla_sylv_t* cntl_sylv_unb; fla_gemm_t* cntl_gemm_blas; /* if( type == FLA_ALG_UNBLOCKED && n > 400 ) { *gflops = 0.0; *diff = 0.0; return; } */ bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_sylv_unb = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL ); cntl_sylv_var = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < n_repeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: /* Time reference implementation */ REF_Sylv_nn( isgn, A, B, C, scale ); break; case 1:{ /* Time variant 1 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 2:{ /* Time variant 2 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 3:{ /* Time variant 3 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 4:{ /* Time variant 4 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var4( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var4( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 5:{ /* Time variant 5 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var5( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var5( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 6:{ /* Time variant 6 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var6( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var6( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 7:{ /* Time variant 7 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var7( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var7( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 8:{ /* Time variant 8 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var8( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var8( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 9:{ /* Time variant 9 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var9( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var9( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 10:{ /* Time variant 10 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var10( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var10( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 11:{ /* Time variant 11 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var11( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var11( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 12:{ /* Time variant 12 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var12( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var12( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 13:{ /* Time variant 13 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var13( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var13( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 14:{ /* Time variant 14 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var14( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var14( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 15:{ /* Time variant 15 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var15( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var15( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 16:{ /* Time variant 16 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var16( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var16( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 17:{ /* Time variant 17 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var17( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var17( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } case 18:{ /* Time variant 18 */ switch( type ){ case FLA_ALG_UNB_OPT: FLA_Sylv_nn_opt_var18( isgn, A, B, C, scale ); break; case FLA_ALG_BLOCKED: FLA_Sylv_nn_blk_var18( isgn, A, B, C, scale, cntl_sylv_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_sylv_var ); FLA_Cntl_obj_free( cntl_sylv_unb ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ){ FLA_Copy_external( C, C_ref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = ( m * m * n + n * n * m ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }
void time_Gemm_hh( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old; fla_blocksize_t* bp; fla_gemm_t* cntl_gemm_blas; fla_gemm_t* cntl_gemm_var; bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_gemm_var = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ // Time reference implementation case 0: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C ); break; // Time variant 1 case 1:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 2 case 2:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 3 case 3:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 4 case 4:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 5 case 5:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var5( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } // Time variant 6 case 6:{ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_hh_unb_var6( FLA_ONE, A, B, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Gemm_hh_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_gemm_var ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }