void time_Apply_Q( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj B, FLA_Obj B_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj B_save, A_flat, B_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_save ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat ); FLASH_Copy( B, B_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( B_save, B ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( B, B_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Apply_Q( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A_flat, t, B_flat ); break; case FLA_ALG_FRONT: //printf("\n"); FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, T, W, B ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A_flat, B_flat ); FLASH_Obj_hierarchify( B_flat, B_ref ); *diff = 0.0; } else { FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, B ); *diff = FLASH_Max_elemwise_diff( B, B_ref ); } *gflops = 2.0 * FLASH_Obj_scalar_length( A ) * FLASH_Obj_scalar_width( A ) * FLASH_Obj_scalar_width( B ) / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( B_save, B ); FLASH_Obj_free( &B_save ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &B_flat ); }
void time_Copy( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copy( A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Copy( A, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * m * n / dtime_old / 1.0e9; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
void time_Trmm( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 4 case 4:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 5 case 5:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 6 case 6:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 7 case 7:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 8 case 8:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 9 case 9:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 10 case 10:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 11 case 11:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_width( C ) * FLASH_Obj_scalar_width( A ) / dtime_old / 1.0e9; if ( param_combo == 0 || param_combo == 3 || param_combo == 6 || param_combo == 9 ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
void time_Lyap( int param_combo, int type, int nrepeats, int m, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_save, norm; if ( param_combo == 0 && type == FLA_ALG_FRONT ) { *gflops = 0.0; *diff = 0.0; return; } FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( C ), 1, 1, 0, 0, &norm ); FLASH_Copy( C, C_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_save, C ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_NO_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_NO_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_CONJ_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_CONJ_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } /* if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } */ { FLA_Obj X, W; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &X ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &W ); FLASH_Copy( C, X ); FLASH_Hermitianize( FLA_UPPER_TRIANGULAR, X ); if ( param_combo == 0 ) { FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W ); } else if ( param_combo == 1 ) { FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W ); } FLASH_Scal( isgn, W ); FLASH_Axpy( FLA_MINUS_ONE, C_save, W ); FLASH_Norm1( W, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLASH_Obj_free( &X ); FLASH_Obj_free( &W ); } *gflops = ( 2.0 / 3.0 ) * ( m * m * m ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_save, C ); FLASH_Obj_free( &C_save ); FLA_Obj_free( &norm ); }
int main( int argc, char *argv[] ) { int m_input, n_input, m, n, rs, cs, i, datatype; int blocksize[3]; int depth; double buffer[64]; double buffer2[64]; FLA_Obj Af, Ah, Bh; FLA_Init(); fprintf( stdout, "%c Enter hierarchy depth:", '%' ); scanf( "%d", &depth ); fprintf( stdout, "%c %d\n", '%', depth ); for ( i = 0; i < depth; ++i ) { fprintf( stdout, "%c Enter blocksize %d:", '%', i ); scanf( "%d", &blocksize[i] ); fprintf( stdout, "%c %d\n", '%', blocksize[i] ); } fprintf( stdout, "%c enter m n: ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); datatype = FLA_DOUBLE; m = m_input; n = n_input; rs = 1; cs = m_input; for( i = 0; i < 64; i++ ) buffer[i] = ( double ) i; for( i = 0; i < 64; i++ ) buffer2[i] = ( double ) 0; //FLASH_Obj_create( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_create_without_buffer( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_attach_buffer( buffer, rs, cs, &Ah ); //FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, Ah, &Af ); //FLASH_Obj_create_hier_conf_to_flat( FLA_NO_TRANSPOSE, Af, depth, blocksize, &Bh ); //FLASH_Obj_create_flat_copy_of_hier( Ah, &Af ); //FLASH_Obj_create_hier_copy_of_flat( Af, depth, blocksize, &Bh ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, Ah, &Bh ); //FLASH_Axpy( FLA_TWO, Ah, Bh ); FLASH_Copy( Ah, Bh ); //FLA_Obj_create_without_buffer( datatype, 4, 4, &Af ); //FLA_Obj_attach_buffer( buffer2, 4, &Af ); //FLASH_Axpy_flat_to_hier( FLA_TWO, Af, 1, 1, Ah ); //FLASH_Axpy_hier_to_flat( FLA_TWO, 1, 1, Ah, Af ); //FLASH_Axpy_buffer_to_hier( FLA_ONE, 4, 4, buffer, 4, 1, 1, Ah ); //FLASH_Axpy_hier_to_buffer( FLA_ONE, 2, 2, Ah, 4, 4, buffer2, 4 ); //fprintf( stderr, "T: Am An = %d %d\n", FLASH_Obj_scalar_length( Ah ), // FLASH_Obj_scalar_width( Ah ) ); //FLASH_Random_matrix( Ah ); //fprintf( stderr, "depth = %d\n", FLASH_Obj_depth( Ah ) );; /* { int depth; int b_m[4]; int b_n[4]; depth = FLASH_Obj_blocksizes( Bh, b_m, b_n ); fprintf( stderr, "depth = %d\n", depth );; fprintf( stderr, "b_m[0] = %d\n", b_m[0] );; fprintf( stderr, "b_n[0] = %d\n", b_n[0] );; } */ FLASH_Obj_show( "", Ah, "%11.4e", "" ); FLASH_Obj_show( "", Bh, "%11.4e", "" ); //FLA_Obj_show( "", Af, "%11.4e", "" ); //FLASH_print_struct( Ah ); //fprintf( stderr, "max_diff = %e\n", FLASH_Max_elemwise_diff( Ah, Bh ) );; //FLASH_Obj_free_without_buffer( &Ah ); //FLASH_Obj_free( &Af ); //FLA_Obj_free( &Af ); FLA_Finalize(); return 0; }
void time_Syrk( int param_combo, int type, int nrepeats, int m, int k, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * m * m * k / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
void time_SPDinv( int param_combo, int type, int nrepeats, int m, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_SPDinv( FLA_LOWER_TRIANGULAR, C_flat ); break; case FLA_ALG_FRONT: FLASH_SPDinv( FLA_LOWER_TRIANGULAR, C ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_SPDinv( FLA_UPPER_TRIANGULAR, C_flat ); break; case FLA_ALG_FRONT: FLASH_SPDinv( FLA_UPPER_TRIANGULAR, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ){ FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else{ *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_length( C ) / dtime_old / 1e9; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &C_flat ); }
void time_Trinv( int param_combo, int type, int nrepeats, int m, FLA_Diag diag, FLA_Obj A, FLA_Obj A_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_old, A_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Copy( A, A_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( A_old, A ); FLASH_Obj_flatten( A, A_flat ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trinv( FLA_LOWER_TRIANGULAR, diag, A_flat ); break; case FLA_ALG_FRONT: FLASH_Trinv( FLA_LOWER_TRIANGULAR, diag, A ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trinv( FLA_UPPER_TRIANGULAR, diag, A_flat ); break; case FLA_ALG_FRONT: FLASH_Trinv( FLA_UPPER_TRIANGULAR, diag, A ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ){ FLASH_Obj_hierarchify( A_flat, A_ref ); *diff = 0.0; } else{ *diff = FLASH_Max_elemwise_diff( A, A_ref ); } *gflops = 1.0 / 4.0 * m * m * m / dtime_old / 1e9; *dtime = dtime_old; FLASH_Copy( A_old, A ); FLASH_Obj_free( &A_old ); FLASH_Obj_free( &A_flat ); }
void time_Gemm( int param_combo, int type, int nrepeats, int m, int k, int n, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, B_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( B, B_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 4 case 4:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 5 case 5:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 6 case 6:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 7 case 7:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 8 case 8:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * m * k * n / dtime_old / 1.0e9; if ( param_combo == 0 || param_combo == 1 || param_combo == 2 || param_combo == 3 || param_combo == 6 ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &B_flat ); FLASH_Obj_free( &C_flat ); }