void time_Syrk_ln( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old; FLA_Obj C_old; fla_blocksize_t* bp; fla_gemm_t* cntl_gemm_blas; fla_syrk_t* cntl_syrk_blas; fla_syrk_t* cntl_syrk_var; bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_syrk_blas = FLA_Cntl_syrk_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL ); cntl_syrk_var = FLA_Cntl_syrk_obj_create( FLA_FLAT, variant, bp, cntl_syrk_blas, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ) { case 0: // Time reference implementation REF_Syrk_ln( FLA_ONE, A, FLA_ONE, C ); break; case 1: { // Time variant 1 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var1( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var1( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } break; } case 2: { // Time variant 2 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var2( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var2( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } break; } case 3: { // Time variant 3 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var3( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var3( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } break; } case 4: { // Time variant 4 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var4( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var4( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } break; } case 5: { // Time variant 5 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var5( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var5( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } break; } case 6: { // Time variant 6 switch( type ) { case FLA_ALG_UNBLOCKED: FLA_Syrk_ln_unb_var6( FLA_ONE, A, FLA_ONE, C ); break; case FLA_ALG_BLOCKED: FLA_Syrk_ln_blk_var6( FLA_ONE, A, FLA_ONE, C, cntl_syrk_var ); break; default: printf("trouble\n"); } } break; } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else { *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } FLA_Cntl_obj_free( cntl_syrk_var ); FLA_Cntl_obj_free( cntl_syrk_blas ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, Cref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, Cref ); } *gflops = 1.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1.0e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }
void time_Syrk_ln( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref, double *dtime, double *diff, double *gflops ) { int irep, info, lwork; double dtime_old, d_minus_one = -1.0, d_one = 1.0; FLA_Obj Cold; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold ); FLA_Copy_external( C, Cold ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( Cold, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: // Time reference implementation REF_Syrk_ln( FLA_ONE, A, FLA_ONE, C ); break; default: printf("trouble\n"); break; } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else{ *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } if ( variant == 0 ){ FLA_Copy_external( C, Cref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, Cref ); } *gflops = 1.0 * FLA_Obj_length( A ) * FLA_Obj_length( A ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( Cold, C ); FLA_Obj_free( &Cold ); }
void time_Syrk_ln( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old; FLA_Obj C_old; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: // Time reference implementation REF_Syrk_ln( FLA_ONE, A, FLA_ONE, C ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var1( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var1( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var1( A, C ); break; default: printf("trouble\n"); } break; } case 2:{ // Time variant 2 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var2( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var2( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var2( A, C ); break; case FLA_ALG_OPENMP_2LOOPSPLUS: FLA_Syrk_ln_omp2x_var2( A, C ); break; default: printf("trouble\n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var3( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var3( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var3( A, C ); break; default: printf("trouble\n"); } break; } case 4:{ // Time variant 4 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var4( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var4( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var4( A, C ); break; default: printf("trouble\n"); } break; } case 5:{ // Time variant 5 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var5( A, C ); break; default: printf("trouble\n"); } break; } } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else{ *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } if ( variant == 0 ){ FLA_Copy_external( C, C_ref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, C_ref ); //FLA_Obj_show( "C:", C, "%f", "\n"); } *gflops = 1.0 * FLA_Obj_length( A ) * FLA_Obj_length( A ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old ); }