void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { int attr[NINT]; FLA_Obj obj[NOBJ]; double *dtime; FLA_Init(); /* Check if the number of arguments supplied is correct */ FLA_M2C_CheckNumArgs(NRHS, nrhs); /* Convert Matlab arguments into the appropriate FLAME C arguments */ FLA_M2C_ConvertArgs(NRHS, prhs, NINT, attr, obj); /* If an extra argument is supplied, collect timing informaion in it. */ if (nrhs == NRHS+1) dtime = FLA_M2C_ConvertDoublePtr(prhs[NRHS]); /* Now call the C FLAME function, timing it if the extra argument is given. */ if (nrhs == NRHS+1) *dtime = FLA_Clock(); FLA_Axpyt_external(attr[0], obj[0], obj[1], obj[2]); if (nrhs == NRHS+1) *dtime = FLA_Clock() - *dtime; FLA_Finalize(); }
int main(int argc, char* argv[]){ dim_t order; dim_t nA; dim_t permutation[FLA_MAX_ORDER]; FLA_Init(); //Parse input if(parse_input(argc, argv, &order, &nA, permutation) == FLA_FAILURE){ Usage(); FLA_Finalize(); return 0; } if(check_errors(order, nA, permutation) == FLA_FAILURE){ Usage(); FLA_Finalize(); return 0; } test_permute_tensor(order, nA, permutation); FLA_Finalize(); return 0; }
int phonopy_pinv_libflame(double *matrix, double *eigvals, const int size, const double cutoff) { FLA_Obj A, B, l; /* FLA_Obj C; */ double *inv_eigvals; int i; inv_eigvals = (double*)malloc(sizeof(double) * size); FLA_Init(); FLA_Obj_create_without_buffer(FLA_DOUBLE, size, size, &A); FLA_Obj_attach_buffer(matrix, 0, 0, &A); FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l); FLA_Obj_attach_buffer(eigvals, 0, 0, &l); /* Eigensolver */ FLA_Obj_create_copy_of(FLA_NO_TRANSPOSE, A, &B); FLA_Hevd(FLA_EVD_WITH_VECTORS, FLA_LOWER_TRIANGULAR, B, l); /* SVD */ /* FLA_Obj_create(FLA_DOUBLE, size, size, 0, 0, &B); */ /* use U */ /* FLA_Svd(FLA_SVD_VECTORS_ALL, FLA_SVD_VECTORS_NONE, A, l, B, C); */ /* use V */ /* FLA_Svd(FLA_SVD_VECTORS_NONE, FLA_SVD_VECTORS_ALL, A, l, C, B); */ FLA_Obj_free_without_buffer(&l); for (i = 0; i < size; i++) { if (eigvals[i] < cutoff) { inv_eigvals[i] = 0; } else { inv_eigvals[i] = 1.0 / sqrt(eigvals[i]); } } FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l); FLA_Obj_attach_buffer(inv_eigvals, 0, 0, &l); FLA_Apply_diag_matrix(FLA_RIGHT, FLA_NO_CONJUGATE, l, B); FLA_Syrk(FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, B, FLA_ZERO, A); FLA_Symmetrize(FLA_LOWER_TRIANGULAR, A); FLA_Obj_free_without_buffer(&A); FLA_Obj_free_without_buffer(&l); FLA_Obj_free(&B); FLA_Finalize(); free(inv_eigvals); return 0; }
void FLA_Init_safe( FLA_Error* init_result ) { if ( FLA_Initialized() ) { *init_result = FLA_FAILURE; } else { FLA_Init(); *init_result = FLA_SUCCESS; } }
int main(int argc, char* argv[]){ dim_t order; TLA_sym sym; dim_t n[FLA_MAX_ORDER]; dim_t b[FLA_MAX_ORDER]; dim_t permutation[FLA_MAX_ORDER]; FLA_Obj T; FLA_Init(); //Parse inputs if(parse_input(argc, argv, &order, &sym, n, b, permutation) == FLA_FAILURE){ Usage(); FLA_Finalize(); return 0; } //Error check if(check_errors(order, sym, n, b, permutation) == FLA_FAILURE){ Usage(); FLA_Finalize(); return 0; } //Perform test create_psym_tensor(order, sym, n, b, &T); FLA_Obj_print_matlab("T", T); test_permute_tensor(permutation, T); FLA_Obj_blocked_psym_tensor_free_buffer(&T); FLA_Obj_free_without_buffer(&T); FLA_Finalize(); return 0; }
void mexFunction(int nargout, mxArray * pargout[], int nargin, const mxArray * pargin[]){ if (nargin > 1) { mexErrMsgTxt("Too many input arguments."); } FLA_Init(); int i; //Parse input FLA_Obj A; mexPrintf("Creating tensor\n"); TLA_mxa_to_tensor(pargin[0], &A); FLA_Obj_print_matlab("A", A); //Pass output mexPrintf("Passing C back\n"); TLA_tensor_to_mxa(A, &(pargout[0])); //DELETE EVERYTHING mexPrintf("finalizing\n"); FLA_Finalize(); }
int main(int argc, char *argv[]) { int datatype, precision, nb_alg, bm, bn, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_save, A_flat, B, B_ref, T, T_flat, W, t; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //precision = FLA_SINGLE_PRECISION; precision = FLA_DOUBLE_PRECISION; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } bm = nb_alg / 4; bn = nb_alg; // If multiplying Q on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) { FLA_Obj_create( datatype, nb_alg, nb_alg, &A_flat ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A_save ); FLA_Obj_create( datatype, bm, bn, &T_flat ); FLASH_Obj_create_ext( datatype, bm, bn, 1, &bm, &bn, &T ); FLASH_Obj_create_ext( datatype, bm, n, 1, &bm, &bn, &W ); } else { FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A ); } FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B_ref ); FLA_Obj_create( datatype, nb_alg, 1, &t ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); fprintf( stdout, "data_applyq_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); FLASH_Copy( A, A_save ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_blk_external( A_flat, t ); FLASH_Obj_hierarchify( A_flat, A ); time_Apply_Q( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); FLASH_Copy( A_save, A ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_UT( A_flat, t, T_flat ); FLASH_Obj_hierarchify( A_flat, A ); FLASH_Obj_hierarchify( T_flat, T ); time_Apply_Q( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLA_Obj_free( &A_flat ); FLASH_Obj_free( &B ); FLASH_Obj_free( &B_ref ); FLA_Obj_free( &t ); FLASH_Obj_free( &T ); FLA_Obj_free( &T_flat ); FLASH_Obj_free( &W ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_applyq\\_%s', 'fla\\_applyq\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME applyq front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc applyq_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main( int argc, char *argv[] ) { int i, j, n_threads, n_repeats, n_trials, increment, begin, sorting, caching, work_stealing, data_affinity; dim_t size, nb_alg; FLA_Datatype datatype = FLA_DOUBLE; FLA_Inv inv = FLA_NO_INVERSE; FLA_Uplo uplo = FLA_LOWER_TRIANGULAR; FLA_Obj A, B, x, b, b_norm, AH, BH; double length, b_norm_value = 0.0, dtime, *dtimes, *flops; #ifndef FLA_ENABLE_WINDOWS_BUILD char output_file_m[100]; FILE *fpp; #endif fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d\n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "%c Enter SuperMatrix parameters: sorting, caching, work stealing, data affinity: ", '%' ); scanf( "%d%d%d%d", &sorting, &caching, &work_stealing, &data_affinity ); fprintf( stdout, "%c %s %s %s %s\n\n", '%', ( sorting ? "TRUE" : "FALSE" ), ( caching ? "TRUE" : "FALSE" ), ( work_stealing ? "TRUE" : "FALSE" ), ( data_affinity ? ( data_affinity == 1 ? "FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC" : "FLASH_QUEUE_AFFINITY_OTHER" ) : "FLASH_QUEUE_AFFINITY_NONE" ) ); #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "%s_%u = [\n", OUTPUT_FILE, nb_alg ); #else sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%\n" ); fprintf( fpp, "%% | Matrix Size | FLASH |\n" ); fprintf( fpp, "%% | n x n | GFlops |\n" ); fprintf( fpp, "%% -----------------------------\n" ); fprintf( fpp, "%s_%u = [\n", OUTPUT_FILE, nb_alg ); #endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, size, 0, 0, &B ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( x ); FLA_Random_matrix( b ); FLA_Symmetrize( uplo, A ); FLA_Symmetrize( uplo, B ); length = ( double ) FLA_Obj_length( B ); FLA_Add_to_diag( &length, B ); FLA_Symv_external( uplo, FLA_ONE, B, x, FLA_ZERO, b ); FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH ); FLASH_Obj_create_hier_copy_of_flat( B, 1, &nb_alg, &BH ); FLASH_Chol( uplo, BH ); dtime = FLA_Clock(); FLASH_Eig_gest( inv, uplo, AH, BH ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; FLASH_Obj_free( &AH ); FLASH_Obj_free( &BH ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 1.0 * size * size * size / dtime / 1e9; #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, " %d %6.3f %le\n", size, flops[i], b_norm_value ); #else fprintf( fpp, " %d %6.3f\n", size, flops[i] ); fprintf( stdout, "Time: %e | GFlops: %6.3f\n", dtime, flops[i] ); fprintf( stdout, "Matrix size: %u x %u | nb_alg: %u\n", size, size, nb_alg ); fprintf( stdout, "Norm of difference: %le\n\n", b_norm_value ); #endif FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &x ); FLA_Obj_free( &b ); FLA_Obj_free( &b_norm ); } #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "];\n\n" ); #else fprintf( fpp, "];\n" ); fflush( fpp ); fclose( fpp ); #endif FLA_free( dtimes ); FLA_free( flops ); FLA_Finalize(); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, k_input, m, k, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char k_dim_desc[14]; char m_dim_tag[10]; char k_dim_tag[10]; dim_t nb_alg, n_threads; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m k (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &k_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, k_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( k_input > 0 ) { sprintf( k_dim_desc, "k = %d", k_input ); sprintf( k_dim_tag, "k%dc", k_input); } else if( k_input < -1 ) { sprintf( k_dim_desc, "k = p/%d", -k_input ); sprintf( k_dim_tag, "k%dp", -k_input ); } else if( k_input == -1 ) { sprintf( k_dim_desc, "k = p" ); sprintf( k_dim_tag, "k%dp", 1 ); } //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; if( m < 0 ) m = p / abs(m_input); if( k < 0 ) k = p / abs(k_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If transposing A, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) { FLASH_Obj_create( datatype, m, k, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, m, k, 1, &nb_alg, &B ); } else { FLASH_Obj_create( datatype, k, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, k, m, 1, &nb_alg, &B ); } FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &C ); FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &C_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); FLASH_Random_matrix( C ); fprintf( stdout, "data_her2k_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Her2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Her2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLASH_Obj_free( &C ); FLASH_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_her2k_%s( :,1 ), data_her2k_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_her2k_%s( :,1 ), data_her2k_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_her2k\\_%s', 'fla\\_her2k\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME her2k front-end performance (%s, %s)' );\n", m_dim_desc, k_dim_desc ); fprintf( stdout, "print -depsc her2k_front_%s_%s.eps\n", m_dim_tag, k_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, j, n_param_combos = N_PARAM_COMBOS; int sign; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, C, C_ref, scale, isgn, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter sign (-1 or 1):", '%' ); scanf( "%d", &sign ); fprintf( stdout, "%c %d\n", '%', sign ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &C ); FLA_Obj_create( datatype, m, m, 0, 0, &C_ref ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Norm1( A, norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, A ); FLA_Random_matrix( C ); FLA_Hermitianize( FLA_UPPER_TRIANGULAR, C ); fprintf( stdout, "data_lyap_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Lyap( param_combo, FLA_ALG_REFERENCE, n_repeats, m, isgn, A, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Lyap( param_combo, FLA_ALG_FRONT, n_repeats, m, isgn, A, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_lyap_%s( :,1 ), data_lyap_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_lyap_%s( :,1 ), data_lyap_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_lyap\\_%s', 'fla\\_lyap\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME lyap front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc lyap_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t nb_alg; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_ref; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A_ref ); if ( pc_str[param_combo][0] == 'l' ) FLASH_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A ); else FLASH_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A ); FLASH_Copy( A, A_ref ); fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &A_ref ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_chol\\_%s', 'fla\\_chol\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc chol_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, nb_alg, variant, n_repeats, i, j, n_variants = N_VARIANTS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, b, b_orig, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / f2c_abs(m_input); FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); /* FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 1, 1, &b ); FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig ); */ if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig ); /* time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops ); fflush( stdout ); */ for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Trinv_un( variant, FLA_ALG_UNBLOCKED, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trinv_un( variant, FLA_ALG_UNB_OPT, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trinv_un( variant, FLA_ALG_BLOCKED, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); } FLA_Obj_free( &A ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &norm ); fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" ); for ( i = 1; i <= n_variants; i++ ){ fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n", variant, variant, colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); fprintf( stdout, "'Reference', ... \n" ); for ( i = 1; i <= n_variants; i++ ) fprintf( stdout, "'FLAME var%d', ... \n", i ); fprintf( stdout, "'Location', 'SouthWest' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME trinv\\_u performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc trinv_l_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); }
int main(int argc, char *argv[]) { int m_input, m, p_first, p_last, p_inc, p, k_accum, b_alg, n_iter_max, variant, n_repeats, i, n_variants = 2; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff1, diff2; FLA_Datatype datatype, dt_real; FLA_Obj A, l, Q, Ql, TT, r, d, e, A_orig, G, R, W2, de, alpha; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter n_iter_max (per eigenvalue): ", '%' ); scanf( "%d", &n_iter_max ); fprintf( stdout, "%c %d\n", '%', n_iter_max ); fprintf( stdout, "%c enter number of sets of Givens rotations to accumulate:", '%' ); scanf( "%d", &k_accum ); fprintf( stdout, "%c %d\n", '%', k_accum ); fprintf( stdout, "%c enter blocking size for application of G:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d\n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &A_orig ); FLA_Obj_create( datatype, m, m, 0, 0, &Q ); FLA_Obj_create( datatype, m, m, 0, 0, &Ql ); FLA_Obj_create( datatype, m, 1, 0, 0, &r ); FLA_Obj_create( datatype, m, m, 0, 0, &W2 ); FLA_Obj_create( datatype, m-1, k_accum, 0, 0, &G ); dt_real = FLA_Obj_datatype_proj_to_real( A ); FLA_Obj_create( dt_real, m, 1, 0, 0, &l ); FLA_Obj_create( dt_real, m, 1, 0, 0, &d ); FLA_Obj_create( dt_real, m-1, 1, 0, 0, &e ); FLA_Obj_create( dt_real, m, m, 0, 0, &R ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &alpha ); *FLA_DOUBLE_PTR( alpha ) = 1.0 / ( sqrt( sqrt( (double) m ) ) ); FLA_Random_unitary_matrix( Q ); //FLA_Fill_with_uniform_dist( FLA_ONE, l ); //FLA_Fill_with_inverse_dist( FLA_ONE, l ); FLA_Fill_with_geometric_dist( alpha, l ); { FLA_Copy( Q, Ql ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, Ql ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, Ql, Q, FLA_ZERO, A ); FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Copy( A, A_orig ); } FLA_Set( FLA_ZERO, l ); FLA_Set( FLA_ZERO, Q ); FLA_Tridiag_UT_create_T( A, &TT ); FLA_Tridiag_UT( FLA_LOWER_TRIANGULAR, A, TT ); FLA_Tridiag_UT_realify( FLA_LOWER_TRIANGULAR, A, r ); FLA_Tridiag_UT_extract_diagonals( FLA_LOWER_TRIANGULAR, A, d, e ); FLA_Tridiag_UT_form_Q( FLA_LOWER_TRIANGULAR, A, TT ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A ); FLA_Obj_free( &TT ); time_Tevd_v( 0, FLA_ALG_REFERENCE, n_repeats, m, k_accum, b_alg, n_iter_max, A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops ); fprintf( stdout, "data_REFq( %d, 1:3 ) = [ %d %6.3lf %9.2e %6.2le %6.2le ]; \n", i, p, gflops, dtime, diff1, diff2 ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Tevd_v( variant, FLA_ALG_UNB_OPT, n_repeats, m, k_accum, b_alg, n_iter_max, A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops ); fprintf( stdout, "%6.3lf %9.2e %6.2le %6.2le ", gflops, dtime, diff1, diff2 ); fflush( stdout ); fprintf( stdout, "];\n" ); fflush( stdout ); } fprintf( stdout, "\n" ); FLA_Obj_free( &A ); FLA_Obj_free( &A_orig ); FLA_Obj_free( &Q ); FLA_Obj_free( &Ql ); FLA_Obj_free( &G ); FLA_Obj_free( &W2 ); FLA_Obj_free( &r ); FLA_Obj_free( &l ); FLA_Obj_free( &d ); FLA_Obj_free( &e ); FLA_Obj_free( &R ); FLA_Obj_free( &alpha ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" ); fprintf( stdout, "hold on;\n" ); for ( i = 1; i <= n_variants; i++ ) { fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... \n" ); fprintf( stdout, "'Reference', ... \n" ); for ( i = 1; i < n_variants; i++ ) fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i ); fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME Hevd_lv performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc tridiag_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, n_threads, m_input, m, n_input, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t b_flash, b_alg; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, TW, b, x; FLA_Obj A_flat, b_flat, x_flat; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &b_flash ); fprintf( stdout, "%c %u\n", '%', b_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); //FLASH_Queue_set_verbose_output( TRUE ); //FLA_Check_error_level_set( FLA_NO_ERROR_CHECKING ); //FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if ( m < 0 ) m = p * abs(m_input); if ( n < 0 ) n = p * abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { FLA_Obj_create( datatype, m, n, 0, 0, &A_flat ); FLA_Obj_create( datatype, n, 1, 0, 0, &x_flat ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_flat ); FLA_Random_matrix( A_flat ); FLA_Random_matrix( b_flat ); FLASH_QR_UT_create_hier_matrices( A_flat, 1, &b_flash, &A, &TW ); FLASH_Obj_create_hier_copy_of_flat( b_flat, 1, &b_flash, &b ); FLASH_Obj_create_hier_copy_of_flat( x_flat, 1, &b_flash, &x ); fprintf( stdout, "data_qrut_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_QR_UT( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, TW, b, x, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A_flat ); FLA_Obj_free( &b_flat ); FLA_Obj_free( &x_flat ); FLASH_Obj_free( &A ); FLASH_Obj_free( &TW ); FLASH_Obj_free( &b ); FLASH_Obj_free( &x ); } } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_qrut\\_%s', 'fla\\_qrut\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME qrut front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc qrut_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; FLA_Uplo uplo; FLA_Diag diag; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, b, b_orig, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ //FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Param_map_netlib_to_flame_uplo( &pc_str[param_combo][0], &uplo ); FLA_Param_map_netlib_to_flame_diag( &pc_str[param_combo][1], &diag ); FLA_Random_tri_matrix( uplo, diag, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig ); fprintf( stdout, "data_trinv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); /* time_Trinv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, uplo, diag, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); */ time_Trinv( param_combo, FLA_ALG_FRONT, n_repeats, m, uplo, diag, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &norm ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_trinv_%s( :,1 ), data_trinv_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_trinv_%s( :,1 ), data_trinv_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_trinv\\_%s', 'fla\\_trinv\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthWest' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME trinv front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc trinv_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main( int argc, char *argv[] ) { int m_input, n_input, m, n, rs, cs, i, datatype; int blocksize[3]; int depth; double buffer[64]; double buffer2[64]; FLA_Obj Af, Ah, Bh; FLA_Init(); fprintf( stdout, "%c Enter hierarchy depth:", '%' ); scanf( "%d", &depth ); fprintf( stdout, "%c %d\n", '%', depth ); for ( i = 0; i < depth; ++i ) { fprintf( stdout, "%c Enter blocksize %d:", '%', i ); scanf( "%d", &blocksize[i] ); fprintf( stdout, "%c %d\n", '%', blocksize[i] ); } fprintf( stdout, "%c enter m n: ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); datatype = FLA_DOUBLE; m = m_input; n = n_input; rs = 1; cs = m_input; for( i = 0; i < 64; i++ ) buffer[i] = ( double ) i; for( i = 0; i < 64; i++ ) buffer2[i] = ( double ) 0; //FLASH_Obj_create( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_create_without_buffer( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_attach_buffer( buffer, rs, cs, &Ah ); //FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, Ah, &Af ); //FLASH_Obj_create_hier_conf_to_flat( FLA_NO_TRANSPOSE, Af, depth, blocksize, &Bh ); //FLASH_Obj_create_flat_copy_of_hier( Ah, &Af ); //FLASH_Obj_create_hier_copy_of_flat( Af, depth, blocksize, &Bh ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, Ah, &Bh ); //FLASH_Axpy( FLA_TWO, Ah, Bh ); FLASH_Copy( Ah, Bh ); //FLA_Obj_create_without_buffer( datatype, 4, 4, &Af ); //FLA_Obj_attach_buffer( buffer2, 4, &Af ); //FLASH_Axpy_flat_to_hier( FLA_TWO, Af, 1, 1, Ah ); //FLASH_Axpy_hier_to_flat( FLA_TWO, 1, 1, Ah, Af ); //FLASH_Axpy_buffer_to_hier( FLA_ONE, 4, 4, buffer, 4, 1, 1, Ah ); //FLASH_Axpy_hier_to_buffer( FLA_ONE, 2, 2, Ah, 4, 4, buffer2, 4 ); //fprintf( stderr, "T: Am An = %d %d\n", FLASH_Obj_scalar_length( Ah ), // FLASH_Obj_scalar_width( Ah ) ); //FLASH_Random_matrix( Ah ); //fprintf( stderr, "depth = %d\n", FLASH_Obj_depth( Ah ) );; /* { int depth; int b_m[4]; int b_n[4]; depth = FLASH_Obj_blocksizes( Bh, b_m, b_n ); fprintf( stderr, "depth = %d\n", depth );; fprintf( stderr, "b_m[0] = %d\n", b_m[0] );; fprintf( stderr, "b_n[0] = %d\n", b_n[0] );; } */ FLASH_Obj_show( "", Ah, "%11.4e", "" ); FLASH_Obj_show( "", Bh, "%11.4e", "" ); //FLA_Obj_show( "", Af, "%11.4e", "" ); //FLASH_print_struct( Ah ); //fprintf( stderr, "max_diff = %e\n", FLASH_Max_elemwise_diff( Ah, Bh ) );; //FLASH_Obj_free_without_buffer( &Ah ); //FLASH_Obj_free( &Af ); //FLA_Obj_free( &Af ); FLA_Finalize(); return 0; }
int main(int argc, char *argv[]) { int m_input, m, p_first, p_last, p_inc, p, b_alg, variant, n_repeats, i, j, datatype, n_variants = 5; int blocksize[16]; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, Y, B, norm; FLA_Inv inv = FLA_NO_INVERSE; FLA_Uplo uplo = FLA_UPPER_TRIANGULAR; /* Initialize FLAME */ FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d\n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / f2c_abs(m_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &Y ); FLA_Obj_create( datatype, m, m, 0, 0, &B ); FLA_Random_spd_matrix( uplo, A ); FLA_Hermitianize( uplo, A ); FLA_Random_spd_matrix( uplo, B ); FLA_Chol( uplo, B ); /* time_Eig_gest_nu( 0, FLA_ALG_REFERENCE, n_repeats, p, b_alg, inv, uplo, A, B, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops ); fflush( stdout ); */ for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_UNBLOCKED, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_UNB_OPT, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_BLOCKED, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); } FLA_Obj_free( &A ); FLA_Obj_free( &Y ); FLA_Obj_free( &B ); fprintf( stdout, "\n" ); } /* // Print the MATLAB commands to plot the data // Delete all existing figures fprintf( stdout, "figure;\n" ); // Plot the performance of the reference implementation fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" ); // Indicate that you want to add to the existing plot fprintf( stdout, "hold on;\n" ); // Plot the data for the other numbers of threads for ( i = 1; i <= n_variants; i++ ){ fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... \n" ); fprintf( stdout, "'Reference', ... \n" ); for ( i = 1; i <= n_variants; i++ ) fprintf( stdout, "'FLAME var%d', ... \n", i ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol\\_l performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc chol_l_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int m, n, k, nfirst, nlast, ninc, i, irep, nrepeats, nb_alg, check;; double dtime, dtime_best, gflops, max_gflops, diff, d_n; FLA_Obj A, B, C, Cref, Cold; /* Initialize FLAME */ FLA_Init( ); /* Every time trial is repeated "repeat" times */ printf( "%% number of repeats:" ); scanf( "%d", &nrepeats ); printf( "%% %d\n", nrepeats ); /* Enter the max GFLOPS attainable */ printf( "%% enter max GFLOPS:" ); scanf( "%lf", &max_gflops ); printf( "%% %lf\n", max_gflops ); /* Enter the algorithmic block size */ printf( "%% enter nb_alg:" ); scanf( "%d", &nb_alg ); printf( "%% %d\n", nb_alg ); /* Timing trials for matrix sizes n=nfirst to nlast in increments of ninc will be performed */ printf( "%% enter nfirst, nlast, ninc:" ); scanf( "%d%d%d", &nfirst, &nlast, &ninc ); printf( "%% %d %d %d\n", nfirst, nlast, ninc ); i = 1; for ( n=nfirst; n<= nlast; n+=ninc ){ /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &B ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &C ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cref ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cold ); /* Generate random matrices L and B */ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( Cold ); gflops = 2.0 * n * n * n * 1.0e-09; /* Time FLA_Symm */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, Cref ); dtime = FLA_Clock(); FLA_Symm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A, B, FLA_ONE, Cref ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_FLAME( %d, 1:2 ) = [ %d %le ];\n", i, n, gflops / dtime_best ); fflush( stdout ); /* Time the your implementations */ #if TEST_UNB_VAR1==TRUE /* Variant 1 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var1( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var1( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR1==TRUE /* Variant 1 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var1( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var1( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR2==TRUE /* Variant 2 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var2( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var2( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR2==TRUE /* Variant 2 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var2( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var2( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR3==TRUE /* Variant 3 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var3( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var3( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR3==TRUE /* Variant 3 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var3( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var3( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR4==TRUE /* Variant 4 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var4( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var4( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR4==TRUE /* Variant 4 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var4( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var4( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR5==TRUE /* Variant 5 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var5( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var5( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR5==TRUE /* Variant 5 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var5( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var5( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR6==TRUE /* Variant 6 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var6( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var6( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR6==TRUE /* Variant 6 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var6( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var6( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR7==TRUE /* Variant 7 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var7( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var7( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR7==TRUE /* Variant 4 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var7( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var7( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_UNB_VAR8==TRUE /* Variant 8 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var8( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var8( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif #if TEST_BLK_VAR8==TRUE /* Variant 4 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var8( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var8( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); #endif FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &Cref ); FLA_Obj_free( &Cold ); printf( "\n" ); i++; } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ printf( "close all\n" ); /* Plot the performance of FLAME */ printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), 'k--' ); \n" ); /* Indicate that you want to add to the existing plot */ printf( "hold on\n" ); /* Plot the performance of the reference implementation */ // printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), 'k-' ); \n" ); /* Plot the performance of your implementations */ #if TEST_UNB_VAR1==TRUE printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), 'r-.' ); \n" ); #endif #if TEST_UNB_VAR2==TRUE printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), 'g-.' ); \n" ); #endif #if TEST_UNB_VAR3==TRUE printf( "plot( data_unb_var3( :,1 ), data_unb_var3( :, 2 ), 'b-.' ); \n" ); #endif #if TEST_UNB_VAR4==TRUE printf( "plot( data_unb_var4( :,1 ), data_unb_var4( :, 2 ), 'm-.' ); \n" ); #endif #if TEST_UNB_VAR5==TRUE printf( "plot( data_unb_var5( :,1 ), data_unb_var5( :, 2 ), 'c-.' ); \n" ); #endif #if TEST_UNB_VAR6==TRUE printf( "plot( data_unb_var6( :,1 ), data_unb_var6( :, 2 ), 'y-.' ); \n" ); #endif #if TEST_UNB_VAR7==TRUE printf( "plot( data_unb_var7( :,1 ), data_unb_var7( :, 2 ), 'k-.' ); \n" ); #endif #if TEST_UNB_VAR8==TRUE printf( "plot( data_unb_var8( :,1 ), data_unb_var8( :, 2 ), 'm:' ); \n" ); #endif #if TEST_BLK_VAR1==TRUE printf( "plot( data_blk_var1( :,1 ), data_blk_var1( :, 2 ), 'r--' ); \n" ); #endif #if TEST_BLK_VAR2==TRUE printf( "plot( data_blk_var2( :,1 ), data_blk_var2( :, 2 ), 'g--' ); \n" ); #endif #if TEST_BLK_VAR3==TRUE printf( "plot( data_blk_var3( :,1 ), data_blk_var3( :, 2 ), 'b--' ); \n" ); #endif #if TEST_BLK_VAR4==TRUE printf( "plot( data_blk_var4( :,1 ), data_blk_var4( :, 2 ), 'm--' ); \n" ); #endif #if TEST_BLK_VAR5==TRUE printf( "plot( data_blk_var5( :,1 ), data_blk_var5( :, 2 ), 'c--' ); \n" ); #endif #if TEST_BLK_VAR6==TRUE printf( "plot( data_blk_var6( :,1 ), data_blk_var6( :, 2 ), 'y--' ); \n" ); #endif #if TEST_BLK_VAR7==TRUE printf( "plot( data_blk_var7( :,1 ), data_blk_var7( :, 2 ), 'k--' ); \n" ); #endif #if TEST_BLK_VAR8==TRUE printf( "plot( data_blk_var8( :,1 ), data_blk_var8( :, 2 ), 'm-' ); \n" ); #endif printf( "hold on \n"); printf( "xlabel( 'matrix dimension m=n' );\n"); printf( "ylabel( 'GFLOPS/sec.' );\n"); // printf( "axis( [ 0 %d 0 %3.1f ] ); \n", nlast, max_gflops ); printf( "legend( 'FLA Trsm', ...\n"); #if TEST_UNB_VAR1==TRUE printf( " 'unb var1', ...\n"); #endif #if TEST_UNB_VAR2==TRUE printf( " 'unb var2', ...\n"); #endif #if TEST_UNB_VAR3==TRUE printf( " 'unb var3', ...\n"); #endif #if TEST_UNB_VAR4==TRUE printf( " 'unb var4', ...\n"); #endif #if TEST_UNB_VAR5==TRUE printf( " 'unb var5', ...\n"); #endif #if TEST_UNB_VAR6==TRUE printf( " 'unb var6', ...\n"); #endif #if TEST_UNB_VAR7==TRUE printf( " 'unb var7', ...\n"); #endif #if TEST_UNB_VAR8==TRUE printf( " 'unb var8', ...\n"); #endif #if TEST_BLK_VAR1==TRUE printf( " 'blk var1', ...\n"); #endif #if TEST_BLK_VAR2==TRUE printf( " 'blk var2', ...\n"); #endif #if TEST_BLK_VAR3==TRUE printf( " 'blk var3', ...\n"); #endif #if TEST_BLK_VAR4==TRUE printf( " 'blk var4', ...\n"); #endif #if TEST_BLK_VAR5==TRUE printf( " 'blk var5', ...\n"); #endif #if TEST_BLK_VAR6==TRUE printf( " 'blk var6', ...\n"); #endif #if TEST_BLK_VAR7==TRUE printf( " 'blk var7', ...\n"); #endif #if TEST_BLK_VAR8==TRUE printf( " 'blk var8', ...\n"); #endif printf( " 2 );\n"); FLA_Finalize( ); }
int main(int argc, char *argv[]) { int datatype, nb_alg, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, j, n_param_combos = N_PARAM_COMBOS; int sign; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref, scale, isgn, norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c Enter sign (-1 or 1):", '%' ); scanf( "%d", &sign ); fprintf( stdout, "%c %d\n", '%', sign ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { FLA_Obj_create( FLA_DOUBLE, 1, 1, &scale ); FLA_Obj_create( FLA_DOUBLE, 1, 1, &norm ); } else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { FLA_Obj_create( FLA_FLOAT, 1, 1, &scale ); FLA_Obj_create( FLA_FLOAT, 1, 1, &norm ); } FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); FLASH_Random_matrix( C ); FLASH_Norm1( A, norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, A ); FLASH_Norm1( B, norm ); if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) ) FLA_Negate( norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, B ); FLASH_Copy( C, C_ref ); fprintf( stdout, "data_sylv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_FRONT, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLASH_Obj_free( &C ); FLASH_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_sylv\\_%s', 'fla\\_sylv\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME sylv front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc sylv_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
void F77_fla_init() { FLA_Init(); }
int main(int argc, char *argv[]) { int m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t b_flash; dim_t n_threads; FLA_Datatype datatype; FLA_Uplo uplo; FLA_Inv inv; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &b_flash ); fprintf( stdout, "%c %u\n", '%', b_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ if ( pc_str[param_combo][0] == 'i' ) inv = FLA_INVERSE; else inv = FLA_NO_INVERSE; if ( pc_str[param_combo][1] == 'l' ) uplo = FLA_LOWER_TRIANGULAR; else uplo = FLA_UPPER_TRIANGULAR; FLASH_Obj_create( datatype, m, m, 1, &b_flash, &A ); FLASH_Obj_create( datatype, m, m, 1, &b_flash, &B ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLASH_Random_spd_matrix( uplo, A ); FLASH_Hermitianize( uplo, A ); FLASH_Random_spd_matrix( uplo, B ); FLASH_Chol( uplo, B ); fprintf( stdout, "data_eig_gest_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Eig_gest( param_combo, FLA_ALG_FRONT, n_repeats, m, inv, uplo, A, B, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLA_Obj_free( &norm ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_eig_gest_%s( :,1 ), data_eig_gest_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_eig_gest_%s( :,1 ), data_eig_gest_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_eig_gest\\_%s', 'fla\\_eig_gest\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME eig_gest front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc eig_gest_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize(); return 0; }
int main(int argc, char *argv[]) { int m_input, k_input, n_input, m, n, k, p_first, p_last, p_inc, p, nb_alg, n_repeats, variant, n_threads, n_thread_experiments, i, j; int n_threads_exp[64]; char *colors = "brkgmckkk"; char *ticks = "o+*xso+*x"; char m_dim_desc[14]; char k_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[5]; char k_dim_tag[5]; char n_dim_tag[5]; char nth_str[32]; double max_gflops=6.0; double dtime, gflops, diff, d_n; FLA_Obj A, B, C, C_ref; /* Initialize FLAME */ FLA_Init( ); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m k n (-1 means bind to problem size: ", '%' ); scanf( "%d%d%d", &m_input, &k_input, &n_input ); fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input ); fprintf( stdout, "%c enter number of thread experiments: ", '%' ); scanf( "%d", &n_thread_experiments ); fprintf( stdout, "%c %d\n", '%', n_thread_experiments ); fprintf( stdout, "%c enter number of threads for each experiment (separated by spaces): ", '%' ); for( i = 0; i < n_thread_experiments; ++i ) scanf( "%d", &n_threads_exp[i] ); fprintf( stdout, "%c", '%' ); for( i = 0; i < n_thread_experiments; ++i ) fprintf( stdout, " %d", n_threads_exp[i] ); /* Delete all existing data structures */ fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( k_input > 0 ) { sprintf( k_dim_desc, "k = %d", k_input ); sprintf( k_dim_tag, "k%dc", k_input); } else if( k_input < -1 ) { sprintf( k_dim_desc, "k = p/%d", -k_input ); sprintf( k_dim_tag, "k%dp", -k_input ); } else if( k_input == -1 ) { sprintf( k_dim_desc, "k = p" ); sprintf( k_dim_tag, "k%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } m = p_last; k = p_last; n = p_last; sprintf( nth_str, "OMP_NUM_THREADS=%d", n_threads_exp[ n_thread_experiments-1 ] ); putenv( nth_str ); blas_cpu_number = n_threads_exp[ n_thread_experiments-1 ]; blas_thread_init(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( k < 0 ) k = p / abs(k_input); if( n < 0 ) n = p / abs(n_input); FLA_Obj_create( FLA_DOUBLE, m, k, &A ); FLA_Obj_create( FLA_DOUBLE, k, n, &B ); FLA_Obj_create( FLA_DOUBLE, m, n, &C ); FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref ); /* Generate random matrices A, C */ if( p > 4000 ){ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); } blas_cpu_number = 1; //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops ); //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops ); //fflush( stdout ); for ( j = 0; j < n_thread_experiments; j++ ){ n_threads = n_threads_exp[j]; blas_cpu_number = n_threads; fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d ", n_threads, i, p ); fflush( stdout ); time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); } fprintf( stdout, "\n" ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ fprintf( stdout, "figure;\n" ); /* Indicate that you want to add to the existing plot */ fprintf( stdout, "hold on;\n" ); /* Plot the data for the other numbers of threads */ for ( i = 0; i < n_thread_experiments; i++ ){ fprintf( stdout, "plot( data_nth%d( :,1 ), data_nth%d( :, 2 ), '%c:%c' ); \n", n_threads_exp[ i ], n_threads_exp[ i ], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_thread_experiments-1; i++ ) fprintf( stdout, "'%d threads', ... \n", n_threads_exp[ i ] ); fprintf( stdout, "'%d threads', 'Location', 'Best' ); \n", n_threads_exp[ n_thread_experiments-1 ] ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, n_threads_exp[n_thread_experiments-1] * max_gflops ); fprintf( stdout, "title( 'Goto BLAS dgemm performance (%s, %s, %s)' );\n", m_dim_desc, k_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemm_nn_goto_p_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
int main(int argc, char *argv[]) { int datatype, m_input, n_input, m, n, min_m_n, p_first, p_last, p_inc, pp, pivot_combo, n_repeats, i, n_pivot_combos = N_PIVOT_COMBOS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj C, b, b_orig, b_norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( pp = p_first, i = 1; pp <= p_last; pp += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = pp / abs(m_input); if( n < 0 ) n = pp / abs(n_input); min_m_n = min( m, n ); for ( pivot_combo = 0; pivot_combo < n_pivot_combos; pivot_combo++ ){ FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( C ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &b_norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &b_norm ); FLA_Random_matrix( C ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig ); fprintf( stdout, "data_lu_%s( %d, 1:5 ) = [ %d ", pc_str[pivot_combo], i, pp ); fflush( stdout ); //time_LU( pivot_combo, FLA_ALG_REFERENCE, n_repeats, m, n, // C, b, b_orig, b_norm, &dtime, &diff, &gflops ); //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); //fflush( stdout ); time_LU( pivot_combo, FLA_ALG_FRONT, n_repeats, m, n, C, b, b_orig, b_norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &C ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &b_norm ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_pivot_combos; i++ ) { fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_pivot_combos; i++ ) fprintf( stdout, "'ref\\_lu\\_%s', 'fla\\_lu\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME LU front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc lu_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If multiplying A on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) FLA_Obj_create( datatype, m, m, 0, 0, &A ); else FLA_Obj_create( datatype, n, n, 0, 0, &A ); FLA_Obj_create( datatype, m, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_symm_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Symm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Symm( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_symm\\_%s', 'fla\\_symm\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME symm front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc symm_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main( int argc, char *argv[] ) { int i, j, n_threads, n_repeats, n_trials, increment, begin, sorting, caching, work_stealing, data_affinity; dim_t size, nb_alg; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A, x, b, b_norm, AH, pH, bH; double b_norm_value, dtime, *dtimes, *flops; #ifndef FLA_ENABLE_WINDOWS_BUILD char output_file_m[100]; FILE *fpp; #endif fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d\n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "%c Enter SuperMatrix parameters: sorting, caching, work stealing, data affinity: ", '%' ); scanf( "%d%d%d%d", &sorting, &caching, &work_stealing, &data_affinity ); fprintf( stdout, "%c %s %s %s %s\n\n", '%', ( sorting ? "TRUE" : "FALSE" ), ( caching ? "TRUE" : "FALSE" ), ( work_stealing ? "TRUE" : "FALSE" ), ( data_affinity ? ( data_affinity == 1 ? "FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC" : "FLASH_QUEUE_AFFINITY_OTHER" ) : "FLASH_QUEUE_AFFINITY_NONE" ) ); #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "%s_%u = [\n", OUTPUT_FILE, nb_alg ); #else sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%\n" ); fprintf( fpp, "%% | Matrix Size | FLASH |\n" ); fprintf( fpp, "%% | n x n | GFlops |\n" ); fprintf( fpp, "%% -----------------------------\n" ); fprintf( fpp, "%s_%u = [\n", OUTPUT_FILE, nb_alg ); #endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); FLA_Random_matrix( b ); FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH ); FLASH_Obj_create( FLA_INT, size, 1, 1, &nb_alg, &pH ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &nb_alg, &bH ); dtime = FLA_Clock(); FLASH_LU_piv( AH, pH ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; FLASH_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pH, bH ); FLASH_Trsv( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, AH, bH ); FLASH_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, AH, bH ); FLASH_Obj_free( &AH ); FLASH_Obj_free( &pH ); FLASH_Obj_flatten( bH, x ); FLASH_Obj_free( &bH ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 2.0 / 3.0 * size * size * size / dtime / 1e9; FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, A, x, FLA_MINUS_ONE, b ); FLA_Nrm2_external( b, b_norm ); FLA_Obj_extract_real_scalar( b_norm, &b_norm_value ); #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, " %d %6.3f %le\n", size, flops[i], b_norm_value ); #else fprintf( fpp, " %d %6.3f\n", size, flops[i] ); fprintf( stdout, "Time: %e | GFlops: %6.3f\n", dtime, flops[i] ); fprintf( stdout, "Matrix size: %u x %u | nb_alg: %u\n", size, size, nb_alg ); fprintf( stdout, "Norm of difference: %le\n\n", b_norm_value ); #endif FLA_Obj_free( &A ); FLA_Obj_free( &x ); FLA_Obj_free( &b ); FLA_Obj_free( &b_norm ); } #ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "];\n\n" ); #else fprintf( fpp, "];\n" ); fflush( fpp ); fclose( fpp ); #endif FLA_free( dtimes ); FLA_free( flops ); FLA_Finalize(); return 0; }
int main(int argc, char *argv[]) { int datatype, precision, m_input, k_input, n_input, m, k, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char k_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char k_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, Ad, Az, B, Bd, Bz, C, Cd, Cz, C_ref, indexd, indexz; FLA_Obj alpha0d, alpha0z, alpha1d, alpha1z, normd, normz; FLA_Obj alphad, alphaz, betad, betaz, rhod, rhoz; FLA_Obj xd, xz, yd, yz; FLA_Init( ); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m k n (-1 means bind to problem size): ", '%' ); scanf( "%d%d%d", &m_input, &k_input, &n_input ); fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( k_input > 0 ) { sprintf( k_dim_desc, "k = %d", k_input ); sprintf( k_dim_tag, "k%dc", k_input); } else if( k_input < -1 ) { sprintf( k_dim_desc, "k = p/%d", -k_input ); sprintf( k_dim_tag, "k%dp", -k_input ); } else if( k_input == -1 ) { sprintf( k_dim_desc, "k = p" ); sprintf( k_dim_tag, "k%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //precision = FLA_SINGLE_PRECISION; precision = FLA_DOUBLE_PRECISION; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / f2c_abs(m_input); if( k < 0 ) k = p / f2c_abs(k_input); if( n < 0 ) n = p / f2c_abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][0] == 'c' || pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If transposing A, switch dimensions. if ( pc_str[param_combo][0] == 'n' ) FLA_Obj_create( datatype, m, k, 0, 0, &A ); else FLA_Obj_create( datatype, k, m, 0, 0, &A ); // If transposing B, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) FLA_Obj_create( datatype, k, n, 0, 0, &B ); else FLA_Obj_create( datatype, n, k, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n ); fflush( stdout ); time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); /* time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); */ fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_gemm\\_%s', 'fla\\_gemm\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );\n", m_dim_desc, k_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, n_input, mB_input, mC_input, mD_input, mB, mC, mD, n, p_first, p_last, p_inc, p, b_alg, variant, n_repeats, i, n_variants = 1; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj B, C, D, T, R, E; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter algorithmic blocksize:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d\n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter n (-1 means bind to problem size): ", '%' ); scanf( "%d", &n_input ); fprintf( stdout, "%c %d\n", '%', n_input ); fprintf( stdout, "%c enter mB mC mD (-1 means bind to problem size): ", '%' ); scanf( "%d %d %d", &mB_input, &mC_input, &mD_input ); fprintf( stdout, "%c %d %d %d\n", '%', mB_input, mC_input, mD_input ); fprintf( stdout, "\nclear all;\n\n" ); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { mB = mB_input; mC = mC_input; mD = mD_input; n = n_input; if( mB < 0 ) mB = p / abs(mB_input); if( mC < 0 ) mC = p / abs(mC_input); if( mD < 0 ) mD = p / abs(mD_input); if( n < 0 ) n = p / abs(n_input); for ( variant = 0; variant < n_variants; variant++ ){ FLA_Obj_create( datatype, mB, n, 0, 0, &B ); FLA_Obj_create( datatype, mC, n, 0, 0, &C ); FLA_Obj_create( datatype, mD, n, 0, 0, &D ); FLA_Obj_create( datatype, b_alg, n, 0, 0, &T ); FLA_Obj_create( datatype, n, n, 0, 0, &R ); FLA_Obj_create( datatype, n, n, 0, 0, &E ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Random_matrix( D ); FLA_Set( FLA_ZERO, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, D, FLA_ONE, R ); FLA_Chol( FLA_UPPER_TRIANGULAR, R ); FLA_Set( FLA_ZERO, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, C, FLA_ONE, E ); FLA_Chol( FLA_UPPER_TRIANGULAR, E ); fprintf( stdout, "data_uddate_ut( %d, 1:5 ) = [ %d ", i, p ); fflush( stdout ); time_UDdate_UT( variant, FLA_ALG_FRONT, n_repeats, mB, mC, mD, n, B, C, D, T, R, E, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &D ); FLA_Obj_free( &T ); FLA_Obj_free( &R ); FLA_Obj_free( &E ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_variants; i++ ) { fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 2 ), '%c:%c' ); \n", colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 4 ), '%c-.%c' ); \n", colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_variants; i++ ) fprintf( stdout, "'ref\\_qr\\_ut', 'fla\\_qr\\_ut', ... \n" ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME UDdate_UT front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc qr_ut_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int m_input, n_input, m, n, p_first, p_last, p_inc, p, nb_alg, variant, n_repeats, i, j, datatype, n_variants = 18; int sign; int blocksize[16]; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref, scale, isgn, norm; /* Initialize FLAME */ FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c Enter sign (-1 or 1):", '%' ); scanf( "%d", &sign ); fprintf( stdout, "%c %d\n", '%', sign ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); /* Delete all existing data structures */ fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, n, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); } else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); } FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, B ); FLA_Random_matrix( C ); FLA_Norm1( A, norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, A ); FLA_Norm1( B, norm ); if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) ) FLA_Negate( norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, B ); time_Sylv_nn( 0, FLA_ALG_REFERENCE, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Sylv_nn( variant, FLA_ALG_UNB_OPT, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Sylv_nn( variant, FLA_ALG_BLOCKED, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); } FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); fprintf( stdout, "\n" ); } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ fprintf( stdout, "figure;\n" ); /* Plot the performance of the reference implementation */ fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" ); /* Indicate that you want to add to the existing plot */ fprintf( stdout, "hold on;\n" ); /* Plot the data for the other numbers of threads */ for ( i = 1; i <= n_variants; i++ ){ fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... \n" ); fprintf( stdout, "'Reference', ... \n" ); for ( i = 1; i <= n_variants; i++ ) fprintf( stdout, "'FLAME var%d', ... \n", i ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME sylv\\_nn performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc sylv_nn_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
int main(int argc, char *argv[]) { int m_input, n_input, m, n, p_first, p_last, p_inc, p, nb_alg, n_repeats, variant, i, j, datatype, n_variants = N_VARIANTS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; /* Initialize FLAME */ FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); /* Delete all existing data structures */ fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; /* Allocate space for the matrices */ FLA_Obj_create( datatype, m, m, &A ); FLA_Obj_create( datatype, m, n, &C ); FLA_Obj_create( datatype, m, n, &C_ref ); /* Generate random matrices A, C */ FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, A ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); /* Time the reference implementation */ time_Trmm_luh( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ //fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fprintf( stdout, "data_var%d( %d, 1:5 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Trmm_luh( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trmm_luh( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); //time_Trmm_luh( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops ); //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); //fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); } fprintf( stdout, "\n" ); FLA_Obj_free( &A ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ fprintf( stdout, "figure;\n" ); /* Plot the performance of the reference implementation */ fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" ); /* Indicate that you want to add to the existing plot */ fprintf( stdout, "hold on;\n" ); /* Plot the data for the other numbers of threads */ for ( i = 1; i <= n_variants; i++ ) { fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n", i, i, colors[ i-1 ], ticks[ i-1 ] ); //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); \n", // i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... \n" ); fprintf( stdout, "'Reference', ... \n" ); for ( i = 1; i < n_variants; i++ ) //fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', 'opt\\_var%d', ... \n", i, i, i ); fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i ); i = n_variants; fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME trmm\\_luc performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc trmm_luc_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
int main(int argc, char *argv[]) { int n, nfirst, nlast, ninc, nlast_unb, i, irep, nrepeats, nb_alg; double dtime, dtime_best, gflops, max_gflops, diff, d_n; FLA_Obj A, Aref, Aold, delta; /* Initialize FLAME */ FLA_Init( ); /* Every time trial is repeated "repeat" times and the fastest run in recorded */ printf( "%% number of repeats:" ); scanf( "%d", &nrepeats ); printf( "%% %d\n", nrepeats ); /* Enter the max GFLOPS attainable This is used to set the y-axis range for the graphs. Here is how you figure out what to enter (on Linux machines): 1) more /proc/cpuinfo (this lists the contents of this file). 2) read through this and figure out the clock rate of the machine (in GHz). 3) Find out (from an expert of from the web) the number of floating point instructions that can be performed per core per clock cycle. 4) Figure out if you are using "multithreaded BLAS" which automatically parallelize calls to the Basic Linear Algebra Subprograms. If so, check how many cores are available. 5) Multiply 2) x 3) x 4) and enter this in response to the below. If you enter a value for max GFLOPS that is lower that the maximum that is observed in the experiments, then the top of the graph is set to the observed maximum. Thus, one possibility is to simply set this to 0.0. */ printf( "%% enter max GFLOPS:" ); scanf( "%lf", &max_gflops ); printf( "%% %lf\n", max_gflops ); /* Enter the algorithmic block size */ printf( "%% enter nb_alg:" ); scanf( "%d", &nb_alg ); printf( "%% %d\n", nb_alg ); /* Timing trials for matrix sizes n=nfirst to nlast in increments of ninc will be performed. Unblocked versions are only tested to nlast_unb */ printf( "%% enter nfirst, nlast, ninc, nlast_unb:" ); scanf( "%d%d%d%d", &nfirst, &nlast, &ninc, &nlast_unb ); printf( "%% %d %d %d %d\n", nfirst, nlast, ninc, nlast_unb ); i = 1; for ( n=nfirst; n<= nlast; n+=ninc ){ /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aref ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aold ); FLA_Obj_create( FLA_DOUBLE, 1, 1, 1, 1, &delta ); /* Generate random matrix A and save in Aold */ FLA_Random_matrix( Aold ); /* Add something large to the diagonal to make sure it isn't ill-conditionsed */ d_n = ( double ) n; *( ( double * ) FLA_Obj_buffer_at_view( delta ) ) = d_n; FLA_Shift_diag( FLA_NO_CONJUGATE, delta, Aold ); /* Set gflops = billions of floating point operations that will be performed */ gflops = 1.0/3.0 * n * n * n * 1.0e-09; /* Time the reference implementation */ #if TIME_LAPACK == TRUE #else // if ( n <= nlast_unb ) #endif { for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, Aref ); dtime = FLA_Clock(); REF_Chol( TIME_LAPACK, Aref, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_REF( %d, 1:2 ) = [ %d %le ];\n", i, n, gflops / dtime_best ); fflush( stdout ); } /* Time FLA_Chol */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); FLA_Chol( FLA_LOWER_TRIANGULAR, A ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_FLAME( %d, 1:2 ) = [ %d %le ];\n", i, n, gflops / dtime_best ); if ( gflops / dtime_best > max_gflops ) max_gflops = gflops / dtime_best; fflush( stdout ); /* Time the your implementations */ /* Variant 1 unblocked */ if ( n <= nlast_unb ){ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_UNB_VAR1 == TRUE Chol_unb_var1( A ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_unb_var1( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); } /* Variant 1 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_BLK_VAR1 == TRUE Chol_blk_var1( A, nb_alg ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_blk_var1( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); /* Variant 2 unblocked */ if ( n <= nlast_unb ){ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_UNB_VAR2 == TRUE Chol_unb_var2( A ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_unb_var2( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); } /* Variant 2 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_BLK_VAR2 == TRUE Chol_blk_var2( A, nb_alg ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_blk_var2( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); /* Variant 3 unblocked */ if ( n <= nlast_unb ){ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_UNB_VAR3 == TRUE Chol_unb_var3( A ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_unb_var3( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); } /* Variant 3 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock(); #if TIME_BLK_VAR3 == TRUE Chol_blk_var3( A, nb_alg ); #else REF_Chol( TIME_LAPACK, A, nb_alg ); #endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_blk_var3( %d, 1:3 ) = [ %d %le %le];\n", i, n, gflops / dtime_best, diff ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &Aold ); FLA_Obj_free( &Aref ); FLA_Obj_free( &delta ); printf( "\n" ); i++; } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ printf( "close all\n" ); #if OCTAVE == TRUE /* Plot the performance of FLAME */ printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), '-k;libflame;' ); \n" ); /* Indicate that you want to add to the existing plot */ printf( "hold on\n" ); /* Plot the performance of the reference implementation */ printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), '-m;reference;' ); \n" ); /* Plot the performance of your implementations */ printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), \"-rx;UnbVar1;\" ); \n" ); printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), \"-go;UnbVar2;\" ); \n" ); printf( "plot( data_unb_var3( :,1 ), data_unb_var3( :, 2 ), \"-b*;UnbVar3;\" ); \n" ); printf( "plot( data_blk_var1( :,1 ), data_blk_var1( :, 2 ), \"-rx;BlkVar1;\", \"markersize\", 3 ); \n" ); printf( "plot( data_blk_var2( :,1 ), data_blk_var2( :, 2 ), \"-go;BlkVar2;\", \"markersize\", 3 ); \n" ); printf( "plot( data_blk_var3( :,1 ), data_blk_var3( :, 2 ), \"-b*;BlkVar3;\", \"markersize\", 3 ); \n" ); #else /* Plot the performance of FLAME */ printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), 'k--' ); \n" ); /* Indicate that you want to add to the existing plot */ printf( "hold on\n" ); /* Plot the performance of the reference implementation */ printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), 'k-' ); \n" ); /* Plot the performance of your implementations */ printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), 'r-.x' ); \n" ); printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), 'g-.o' ); \n" ); printf( "plot( data_unb_var3( :,1 ), data_unb_var3( :, 2 ), 'b-.*' ); \n" ); printf( "plot( data_blk_var1( :,1 ), data_blk_var1( :, 2 ), 'r-x'); \n" ); printf( "plot( data_blk_var2( :,1 ), data_blk_var2( :, 2 ), 'g-o'); \n" ); printf( "plot( data_blk_var3( :,1 ), data_blk_var3( :, 2 ), 'b-*'); \n" ); #endif printf( "hold off \n"); printf( "xlabel( 'matrix dimension m=n' );\n"); printf( "ylabel( 'GFLOPS/sec.' );\n"); printf( "axis( [ 0 %d 0 %3.1f ] ); \n", nlast, max_gflops ); #if OCTAVE == TRUE printf( "legend( 2 ); \n" ); printf(" print -landscape -solid -color -deps -F:24 Chol.eps\n" ); #else printf( "legend( 'FLA Chol', ...\n"); printf( " 'Simple loops', ...\n"); printf( " 'unb var1', ...\n"); printf( " 'unb var2', ...\n"); printf( " 'unb var3', ...\n"); printf( " 'blk var1', ...\n"); printf( " 'blk var2', ...\n"); printf( " 'blk var3', 2);\n"); printf( "print -r100 -dpdf Chol.pdf\n"); #endif FLA_Finalize( ); exit( 0 ); }