FLA_Error FLASH_Gemv( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Gemv_internal( transa, alpha, A, x, beta, y, flash_gemv_cntl_fm_rp ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_Axpy( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Axpy_check( alpha, A, B ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Axpy_internal( alpha, A, B, flash_axpy_cntl ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_Trsv( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trsv_check( uplo, trans, diag, A, x ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Trsv_internal( uplo, trans, diag, A, x, flash_trsv_cntl ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t nb_alg; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_ref; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A_ref ); if ( pc_str[param_combo][0] == 'l' ) FLASH_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A ); else FLASH_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A ); FLASH_Copy( A, A_ref ); fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &A_ref ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_chol\\_%s', 'fla\\_chol\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc chol_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, precision, nb_alg, bm, bn, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_save, A_flat, B, B_ref, T, T_flat, W, t; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //precision = FLA_SINGLE_PRECISION; precision = FLA_DOUBLE_PRECISION; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } bm = nb_alg / 4; bn = nb_alg; // If multiplying Q on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) { FLA_Obj_create( datatype, nb_alg, nb_alg, &A_flat ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A_save ); FLA_Obj_create( datatype, bm, bn, &T_flat ); FLASH_Obj_create_ext( datatype, bm, bn, 1, &bm, &bn, &T ); FLASH_Obj_create_ext( datatype, bm, n, 1, &bm, &bn, &W ); } else { FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A ); } FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B_ref ); FLA_Obj_create( datatype, nb_alg, 1, &t ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); fprintf( stdout, "data_applyq_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); FLASH_Copy( A, A_save ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_blk_external( A_flat, t ); FLASH_Obj_hierarchify( A_flat, A ); time_Apply_Q( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); FLASH_Copy( A_save, A ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_UT( A_flat, t, T_flat ); FLASH_Obj_hierarchify( A_flat, A ); FLASH_Obj_hierarchify( T_flat, T ); time_Apply_Q( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLA_Obj_free( &A_flat ); FLASH_Obj_free( &B ); FLASH_Obj_free( &B_ref ); FLA_Obj_free( &t ); FLASH_Obj_free( &T ); FLA_Obj_free( &T_flat ); FLASH_Obj_free( &W ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_applyq\\_%s', 'fla\\_applyq\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME applyq front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc applyq_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, n_input, m, n, min_m_n, p_first, p_last, p_inc, pp, pivot_combo, n_repeats, i, n_pivot_combos = N_PIVOT_COMBOS; dim_t nb_alg, nb_flash; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj C, p, b, b_ref, b_norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter algorithmic blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_flash ); fprintf( stdout, "%c %u\n", '%', nb_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_disable(); for ( pp = p_first, i = 1; pp <= p_last; pp += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = pp / abs(m_input); if( n < 0 ) n = pp / abs(n_input); min_m_n = min( m, n ); for ( pivot_combo = 0; pivot_combo < n_pivot_combos; pivot_combo++ ){ FLA_Obj_create( datatype, m, n, &C ); FLA_Obj_create( FLA_INT, min_m_n, 1, &p ); FLA_Obj_create( datatype, m, 1, &b ); FLA_Obj_create( datatype, m, 1, &b_ref ); FLA_Obj_create( datatype, 1, 1, &b_norm ); FLA_Random_matrix( C ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_ref ); fprintf( stdout, "data_lu_%s( %d, 1:5 ) = [ %d ", pc_str[pivot_combo], i, pp ); fflush( stdout ); time_LU( pivot_combo, FLA_ALG_REFERENCE, n_repeats, m, n, nb_alg, nb_flash, C, p, b, b_ref, b_norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_LU( pivot_combo, FLA_ALG_FRONT, n_repeats, m, n, nb_alg, nb_flash, C, p, b, b_ref, b_norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &C ); FLA_Obj_free( &p ); FLA_Obj_free( &b ); FLA_Obj_free( &b_ref ); FLA_Obj_free( &b_norm ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_pivot_combos; i++ ) { fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_pivot_combos; i++ ) fprintf( stdout, "'ref\\_lu\\_%s', 'fla\\_lu\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME LU front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc lu_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, nb_alg, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, j, n_param_combos = N_PARAM_COMBOS; int sign; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref, scale, isgn, norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c Enter sign (-1 or 1):", '%' ); scanf( "%d", &sign ); fprintf( stdout, "%c %d\n", '%', sign ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { FLA_Obj_create( FLA_DOUBLE, 1, 1, &scale ); FLA_Obj_create( FLA_DOUBLE, 1, 1, &norm ); } else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { FLA_Obj_create( FLA_FLOAT, 1, 1, &scale ); FLA_Obj_create( FLA_FLOAT, 1, 1, &norm ); } FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); FLASH_Random_matrix( C ); FLASH_Norm1( A, norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, A ); FLASH_Norm1( B, norm ); if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) ) FLA_Negate( norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, B ); FLASH_Copy( C, C_ref ); fprintf( stdout, "data_sylv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_FRONT, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLASH_Obj_free( &C ); FLASH_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_sylv\\_%s', 'fla\\_sylv\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME sylv front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc sylv_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
int main(int argc, char *argv[]) { int datatype, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t nb_flash, nb_alg; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A_flat, T_flat, W_flat, B_flat; FLA_Obj A, TW, W1, B, B_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter algorithmic blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_flash ); fprintf( stdout, "%c %u\n", '%', nb_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { FLA_Obj_create( datatype, m, m, &A_flat ); FLA_Obj_create( datatype, nb_alg, m, &T_flat ); FLA_Obj_create( datatype, nb_alg, n, &W_flat ); FLA_Obj_create( datatype, m, n, &B_flat ); FLA_Random_matrix( A_flat ); FLA_Random_matrix( B_flat ); FLASH_Obj_create_ext( datatype, m, n, 1, &nb_flash, &nb_flash, &B ); FLASH_Obj_create_ext( datatype, m, n, 1, &nb_flash, &nb_flash, &B_ref ); FLASH_QR_UT_inc_create_hier_matrices( A_flat, 1, &nb_flash, nb_alg, &A, &TW ); FLASH_Apply_Q_UT_inc_create_workspace( TW, B, &W1 ); FLASH_Obj_hierarchify( B_flat, B ); fprintf( stdout, "data_qrutinc_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Apply_Q_UT_inc( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, TW, W1, B, B_ref, A_flat, T_flat, W_flat, B_flat, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Apply_Q_UT_inc( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, TW, W1, B, B_ref, A_flat, T_flat, W_flat, B_flat, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A_flat ); FLA_Obj_free( &T_flat ); FLA_Obj_free( &W_flat ); FLA_Obj_free( &B_flat ); FLASH_Obj_free( &A ); FLASH_Obj_free( &TW ); FLASH_Obj_free( &W1 ); FLASH_Obj_free( &B ); FLASH_Obj_free( &B_ref ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_qrutinc_%s( :,1 ), data_qrutinc_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_qrutinc_%s( :,1 ), data_qrutinc_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_qrutinc\\_%s', 'fla\\_qrutinc\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME qrutinc front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc qrutinc_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }