FLA_Error FLASH_LQ_UT_solve( FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X ) { FLA_Obj W; FLA_Obj AL, AR; FLA_Obj XT, XB; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LQ_UT_solve_check( A, T, B, X ); FLASH_Apply_Q_UT_create_workspace( T, X, &W ); FLA_Part_1x2( A, &AL, &AR, FLA_Obj_length( A ), FLA_LEFT ); FLA_Part_2x1( X, &XT, &XB, FLA_Obj_length( B ), FLA_TOP ); FLASH_Copy( B, XT ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AL, XT ); FLASH_Set( FLA_ZERO, XB ); FLASH_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W, X ); FLASH_Obj_free( &W ); return FLA_SUCCESS; }
FLA_Error FLASH_Chol_solve( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, FLA_Obj X ) { // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Chol_solve_check( uplo, A, B, X ); FLASH_Copy( B, X ); if ( uplo == FLA_LOWER_TRIANGULAR ) { FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); } else // if ( uplo == FLA_UPPER_TRIANGULAR ) { FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); } return FLA_SUCCESS; }
FLA_Error FLASH_LU_incpiv_solve( FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj B, FLA_Obj X ) { // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_incpiv_solve_check( A, p, L, B, X ); FLASH_Copy( B, X ); FLASH_FS_incpiv( A, p, L, X ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); return FLA_SUCCESS; }
FLA_Error FLASH_UDdate_UT_inc_solve( FLA_Obj R, FLA_Obj bR, FLA_Obj x ) { // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_UDdate_UT_inc_solve_check( R, bR, x ); // Copy the contents of bR to x so that after the triangular solve, the // solution resides in x (and bR is preserved). FLASH_Copy( bR, x ); // Perform a triangular solve with R the right-hand side. FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, R, x ); return FLA_SUCCESS; }
int main(int argc, char *argv[]) { int datatype, precision, nb_alg, bm, bn, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_save, A_flat, B, B_ref, T, T_flat, W, t; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //precision = FLA_SINGLE_PRECISION; precision = FLA_DOUBLE_PRECISION; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } bm = nb_alg / 4; bn = nb_alg; // If multiplying Q on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) { FLA_Obj_create( datatype, nb_alg, nb_alg, &A_flat ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A_save ); FLA_Obj_create( datatype, bm, bn, &T_flat ); FLASH_Obj_create_ext( datatype, bm, bn, 1, &bm, &bn, &T ); FLASH_Obj_create_ext( datatype, bm, n, 1, &bm, &bn, &W ); } else { FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A ); } FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B_ref ); FLA_Obj_create( datatype, nb_alg, 1, &t ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); fprintf( stdout, "data_applyq_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); FLASH_Copy( A, A_save ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_blk_external( A_flat, t ); FLASH_Obj_hierarchify( A_flat, A ); time_Apply_Q( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); FLASH_Copy( A_save, A ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_UT( A_flat, t, T_flat ); FLASH_Obj_hierarchify( A_flat, A ); FLASH_Obj_hierarchify( T_flat, T ); time_Apply_Q( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLA_Obj_free( &A_flat ); FLASH_Obj_free( &B ); FLASH_Obj_free( &B_ref ); FLA_Obj_free( &t ); FLASH_Obj_free( &T ); FLA_Obj_free( &T_flat ); FLASH_Obj_free( &W ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_applyq_%s( :,1 ), data_applyq_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_applyq\\_%s', 'fla\\_applyq\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME applyq front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc applyq_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
void time_Trmm( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 4 case 4:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 5 case 5:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 6 case 6:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 7 case 7:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 8 case 8:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 9 case 9:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 10 case 10:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 11 case 11:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_width( C ) * FLASH_Obj_scalar_width( A ) / dtime_old / 1.0e9; if ( param_combo == 0 || param_combo == 3 || param_combo == 6 || param_combo == 9 ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
int main( int argc, char *argv[] ) { int m_input, n_input, m, n, rs, cs, i, datatype; int blocksize[3]; int depth; double buffer[64]; double buffer2[64]; FLA_Obj Af, Ah, Bh; FLA_Init(); fprintf( stdout, "%c Enter hierarchy depth:", '%' ); scanf( "%d", &depth ); fprintf( stdout, "%c %d\n", '%', depth ); for ( i = 0; i < depth; ++i ) { fprintf( stdout, "%c Enter blocksize %d:", '%', i ); scanf( "%d", &blocksize[i] ); fprintf( stdout, "%c %d\n", '%', blocksize[i] ); } fprintf( stdout, "%c enter m n: ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); datatype = FLA_DOUBLE; m = m_input; n = n_input; rs = 1; cs = m_input; for( i = 0; i < 64; i++ ) buffer[i] = ( double ) i; for( i = 0; i < 64; i++ ) buffer2[i] = ( double ) 0; //FLASH_Obj_create( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_create_without_buffer( datatype, m, n, depth, blocksize, &Ah ); FLASH_Obj_attach_buffer( buffer, rs, cs, &Ah ); //FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, Ah, &Af ); //FLASH_Obj_create_hier_conf_to_flat( FLA_NO_TRANSPOSE, Af, depth, blocksize, &Bh ); //FLASH_Obj_create_flat_copy_of_hier( Ah, &Af ); //FLASH_Obj_create_hier_copy_of_flat( Af, depth, blocksize, &Bh ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, Ah, &Bh ); //FLASH_Axpy( FLA_TWO, Ah, Bh ); FLASH_Copy( Ah, Bh ); //FLA_Obj_create_without_buffer( datatype, 4, 4, &Af ); //FLA_Obj_attach_buffer( buffer2, 4, &Af ); //FLASH_Axpy_flat_to_hier( FLA_TWO, Af, 1, 1, Ah ); //FLASH_Axpy_hier_to_flat( FLA_TWO, 1, 1, Ah, Af ); //FLASH_Axpy_buffer_to_hier( FLA_ONE, 4, 4, buffer, 4, 1, 1, Ah ); //FLASH_Axpy_hier_to_buffer( FLA_ONE, 2, 2, Ah, 4, 4, buffer2, 4 ); //fprintf( stderr, "T: Am An = %d %d\n", FLASH_Obj_scalar_length( Ah ), // FLASH_Obj_scalar_width( Ah ) ); //FLASH_Random_matrix( Ah ); //fprintf( stderr, "depth = %d\n", FLASH_Obj_depth( Ah ) );; /* { int depth; int b_m[4]; int b_n[4]; depth = FLASH_Obj_blocksizes( Bh, b_m, b_n ); fprintf( stderr, "depth = %d\n", depth );; fprintf( stderr, "b_m[0] = %d\n", b_m[0] );; fprintf( stderr, "b_n[0] = %d\n", b_n[0] );; } */ FLASH_Obj_show( "", Ah, "%11.4e", "" ); FLASH_Obj_show( "", Bh, "%11.4e", "" ); //FLA_Obj_show( "", Af, "%11.4e", "" ); //FLASH_print_struct( Ah ); //fprintf( stderr, "max_diff = %e\n", FLASH_Max_elemwise_diff( Ah, Bh ) );; //FLASH_Obj_free_without_buffer( &Ah ); //FLASH_Obj_free( &Af ); //FLA_Obj_free( &Af ); FLA_Finalize(); return 0; }
void main (void) { unsigned char temp_byte = 0x00; FLADDR start_address = 0x5FFE; char test_write_buff[8] = "ABCDEFG"; char test_write_buff2[3] = "HIJ"; char test_read_buff[8] = {0}; char test_compare_buff[8] = "ABCDEFG"; unsigned char i; bit error_flag = 0; PCA0MD &= ~0x40; // WDTE = 0 (clear watchdog timer // enable) if ((RSTSRC & 0x02) != 0x02) { if ((RSTSRC & 0x40) == 0x40) { LED = 0; while(1); // Last reset was caused by a Flash // Error Device Reset // LED is off and loop forever to // indicate error } } Oscillator_Init(); // Initialize the internal oscillator // to 24.5 MHz VDDMon_Init(); Port_Init(); LED = 1; SFRPAGE = LEGACY_PAGE; // Initially erase the test page of Flash FLASH_PageErase(start_address); //BEGIN TEST================================================================ // Check if able to Write and Read the Flash-------------------------------- FLASH_ByteWrite(start_address, 0xA5); temp_byte = FLASH_ByteRead(start_address); if (temp_byte != 0xA5) { error_flag = 1; } //-------------------------------------------------------------------------- // Check if able to Erase a page of the Flash------------------------------- FLASH_PageErase(start_address); temp_byte = FLASH_ByteRead(start_address); if (temp_byte != 0xFF) { error_flag = 1; } //-------------------------------------------------------------------------- // Check if able to write and read a series of bytes------------------------ FLASH_Write(start_address, test_write_buff, sizeof(test_write_buff)); FLASH_Read(test_read_buff, start_address, sizeof(test_write_buff)); for (i = 0; i < sizeof(test_write_buff); i++) { if (test_read_buff[i] != test_write_buff[i]) { error_flag = 1; } } //-------------------------------------------------------------------------- // Check if able to Erase a few bytes--------------------------------------- FLASH_Clear(start_address, 2); FLASH_Read(test_read_buff, start_address, sizeof(test_write_buff)); // Simulate the same changes to a data array for comparison test_compare_buff[0] = 0xFF; test_compare_buff[1] = 0xFF; for (i = 0; i < sizeof(test_compare_buff); i++) { if (test_read_buff[i] != test_compare_buff[i]) { error_flag = 1; } } //-------------------------------------------------------------------------- // Check if able to "update" (erase then re-write) a few bytes-------------- FLASH_Update (start_address, test_write_buff2, 3); FLASH_Read(test_read_buff, start_address, sizeof(test_write_buff)); // Simulate the same changes to a data array for comparison test_compare_buff[0] = test_write_buff2[0]; test_compare_buff[1] = test_write_buff2[1]; test_compare_buff[2] = test_write_buff2[2]; for (i = 0; i < sizeof(test_compare_buff); i++) { if (test_read_buff[i] != test_compare_buff[i]) { error_flag = 1; } } //-------------------------------------------------------------------------- // Check if able to copy data in the Flash---------------------------------- FLASH_Copy (start_address+sizeof(test_write_buff), start_address, sizeof(test_write_buff)); FLASH_Read(test_read_buff, start_address+sizeof(test_write_buff), sizeof(test_read_buff)); for (i = 0; i < sizeof(test_write_buff); i++) { if (test_read_buff[i] != test_compare_buff[i]) { error_flag = 1; } } //-------------------------------------------------------------------------- // FLASH test routines------------------------------------------------------ FLASH_Fill (start_address+sizeof(test_write_buff)*2, sizeof(test_write_buff), 0x5A); FLASH_Read(test_read_buff, start_address+sizeof(test_write_buff)*2, sizeof(test_write_buff)); for (i = 0; i < sizeof(test_write_buff); i++) { if (test_read_buff[i] != 0x5A) { error_flag = 1; } } //-------------------------------------------------------------------------- //END OF TEST=============================================================== while (1) // Loop forever { // Blink LED to indicate success if (error_flag == 0) { LED = ~LED; Timer0_Delay_ms (100); } else { LED = 0; } } }
void time_Apply_Q( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj B, FLA_Obj B_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj B_save, A_flat, B_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_save ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat ); FLASH_Copy( B, B_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( B_save, B ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( B, B_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Apply_Q( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A_flat, t, B_flat ); break; case FLA_ALG_FRONT: //printf("\n"); FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, T, W, B ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A_flat, B_flat ); FLASH_Obj_hierarchify( B_flat, B_ref ); *diff = 0.0; } else { FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, B ); *diff = FLASH_Max_elemwise_diff( B, B_ref ); } *gflops = 2.0 * FLASH_Obj_scalar_length( A ) * FLASH_Obj_scalar_width( A ) * FLASH_Obj_scalar_width( B ) / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( B_save, B ); FLASH_Obj_free( &B_save ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &B_flat ); }
int main(int argc, char *argv[]) { int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t nb_alg; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, A_ref; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d\n", '%', m_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A_ref ); if ( pc_str[param_combo][0] == 'l' ) FLASH_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A ); else FLASH_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A ); FLASH_Copy( A, A_ref ); fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m, A, A_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &A_ref ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_chol\\_%s', 'fla\\_chol\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc chol_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, precision, nb_alg, n_threads, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; int one = 1; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, x, y, y_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n", '%', n_threads ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //precision = FLA_SINGLE_PRECISION; precision = FLA_DOUBLE_PRECISION; FLASH_Queue_set_num_threads( n_threads ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][0] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If transposing A, switch dimensions. if ( pc_str[param_combo][0] == 'n' ) FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &A ); else FLASH_Obj_create( datatype, n, m, 1, &nb_alg, &A ); FLASH_Obj_create_ext( datatype, n, 1, 1, &nb_alg, &one, &x ); FLASH_Obj_create_ext( datatype, m, 1, 1, &nb_alg, &one, &y ); FLASH_Obj_create_ext( datatype, m, 1, 1, &nb_alg, &one, &y_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( x ); FLASH_Random_matrix( y ); FLASH_Copy( y, y_ref ); fprintf( stdout, "data_gemv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Gemv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, x, y, y_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Gemv( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, x, y, y_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &x ); FLASH_Obj_free( &y ); FLASH_Obj_free( &y_ref ); } fprintf( stdout, "\n" ); } fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_gemv_%s( :,1 ), data_gemv_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_gemv_%s( :,1 ), data_gemv_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_gemv\\_%s', 'fla\\_gemv\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME gemv front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemv_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); return 0; }
void time_LQ_UT( int param_combo, int type, int nrepeats, int m, int n, int b_flash, FLA_Obj A, FLA_Obj TW, FLA_Obj b, FLA_Obj x, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_save; FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( A_save, A ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_FRONT: FLASH_LQ_UT( A, TW ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } { FLA_Obj A_save_flat, x_flat, b_flat, y_flat; FLA_Obj norm; FLASH_Obj_create_flat_copy_of_hier( A_save, &A_save_flat ); FLASH_Obj_create_flat_copy_of_hier( b, &b_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, x, &x_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, x, &y_flat ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); /* { FLA_Obj At, Tt; FLASH_Obj_create_flat_copy_of_hier( A_save, &At ); FLA_Obj_create( FLA_Obj_datatype( A_save ), b_flash, FLA_Obj_min_dim( At ), 0, 0, &Tt ); FLA_LQ_UT( At, Tt ); FLASH_Obj_show( "A_save", A_save, "%9.1e", "" ); FLASH_Obj_show( "A", A, "%9.1e", "" ); FLA_Obj_show( "At", At, "%9.1e", "" ); FLA_Obj_free( &At ); FLA_Obj_free( &Tt ); } */ FLASH_LQ_UT_solve( A, TW, b, x ); FLASH_Obj_flatten( x, x_flat ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, A_save_flat, x_flat, FLA_MINUS_ONE, b_flat ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save_flat, b_flat, FLA_ZERO, y_flat ); FLA_Nrm2_external( y_flat, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLA_Obj_free( &A_save_flat ); FLA_Obj_free( &b_flat ); FLA_Obj_free( &x_flat ); FLA_Obj_free( &y_flat ); FLA_Obj_free( &norm ); } *gflops = ( 2.0 * m * n * n - ( 2.0 / 3.0 ) * n * n * n ) / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Obj_free( &A_save ); }
void time_SPDinv( int param_combo, int type, int nrepeats, int m, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_SPDinv( FLA_LOWER_TRIANGULAR, C_flat ); break; case FLA_ALG_FRONT: FLASH_SPDinv( FLA_LOWER_TRIANGULAR, C ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_SPDinv( FLA_UPPER_TRIANGULAR, C_flat ); break; case FLA_ALG_FRONT: FLASH_SPDinv( FLA_UPPER_TRIANGULAR, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ){ FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else{ *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_length( C ) * FLASH_Obj_scalar_length( C ) / dtime_old / 1e9; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &C_flat ); }
void time_Trinv( int param_combo, int type, int nrepeats, int m, FLA_Diag diag, FLA_Obj A, FLA_Obj A_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_old, A_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Copy( A, A_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( A_old, A ); FLASH_Obj_flatten( A, A_flat ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trinv( FLA_LOWER_TRIANGULAR, diag, A_flat ); break; case FLA_ALG_FRONT: FLASH_Trinv( FLA_LOWER_TRIANGULAR, diag, A ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Trinv( FLA_UPPER_TRIANGULAR, diag, A_flat ); break; case FLA_ALG_FRONT: FLASH_Trinv( FLA_UPPER_TRIANGULAR, diag, A ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ){ FLASH_Obj_hierarchify( A_flat, A_ref ); *diff = 0.0; } else{ *diff = FLASH_Max_elemwise_diff( A, A_ref ); } *gflops = 1.0 / 4.0 * m * m * m / dtime_old / 1e9; *dtime = dtime_old; FLASH_Copy( A_old, A ); FLASH_Obj_free( &A_old ); FLASH_Obj_free( &A_flat ); }
void time_LU( int pivot_combo, int type, int nrepeats, int m, int n, dim_t nb_alg, dim_t nb_flash, FLA_Obj A, FLA_Obj p, FLA_Obj x, FLA_Obj b, FLA_Obj norm, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj AH_save, b_save; FLA_Obj AH, pH, bH, LH; FLASH_LU_incpiv_create_hier_matrices( A, 1, &nb_flash, nb_alg, &AH, &pH, &LH ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &nb_flash, &bH ); FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, AH, &AH_save ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, b, &b_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( AH_save, AH ); *dtime = FLA_Clock(); switch( pivot_combo ){ case 0: { switch( type ) { case FLA_ALG_FRONT_OPT0: FLASH_LU_incpiv_noopt( AH, pH, LH ); break; case FLA_ALG_FRONT_OPT1: FLASH_LU_incpiv_opt1( AH, pH, LH ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } { FLASH_FS_incpiv( AH, pH, LH, bH ); FLASH_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, AH, bH ); FLASH_Obj_flatten( bH, x ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, A, x, FLA_MINUS_ONE, b ); FLA_Nrm2_external( b, norm ); FLA_Obj_extract_real_scalar( norm, diff ); } *gflops = 2.0 / 3.0 * m * m * n / dtime_old / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy( b_save, b ); FLASH_Obj_free( &AH ); FLASH_Obj_free( &pH ); FLASH_Obj_free( &bH ); FLASH_Obj_free( &LH ); FLA_Obj_free( &b_save ); FLASH_Obj_free( &AH_save ); }
int main(int argc, char *argv[]) { int datatype, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t nb_alg, n_threads; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%u", &n_threads ); fprintf( stdout, "%c %u\n", '%', n_threads ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); //FLASH_Queue_set_verbose_output( TRUE ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If multiplying A on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); else FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); FLASH_Random_matrix( C ); FLASH_Copy( C, C_ref ); fprintf( stdout, "data_hemm_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Hemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Hemm( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLASH_Obj_free( &C ); FLASH_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } /* fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_hemm_%s( :,1 ), data_hemm_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_hemm_%s( :,1 ), data_hemm_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_hemm\\_%s', 'fla\\_hemm\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME hemm front-end performance (%s, %s)' );\n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc hemm_front_%s_%s.eps\n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); */ FLA_Finalize( ); return 0; }
int main(int argc, char *argv[]) { int datatype, nb_alg, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, j, n_param_combos = N_PARAM_COMBOS; int sign; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref, scale, isgn, norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c Enter sign (-1 or 1):", '%' ); scanf( "%d", &sign ); fprintf( stdout, "%c %d\n", '%', sign ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d\n", '%', m_input, n_input ); fprintf( stdout, "\nclear all;\n\n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { FLA_Obj_create( FLA_DOUBLE, 1, 1, &scale ); FLA_Obj_create( FLA_DOUBLE, 1, 1, &norm ); } else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { FLA_Obj_create( FLA_FLOAT, 1, 1, &scale ); FLA_Obj_create( FLA_FLOAT, 1, 1, &norm ); } FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C ); FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C_ref ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); FLASH_Random_matrix( C ); FLASH_Norm1( A, norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, A ); FLASH_Norm1( B, norm ); if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) ) FLA_Negate( norm ); FLASH_Obj_shift_diagonal( FLA_NO_CONJUGATE, norm, B ); FLASH_Copy( C, C_ref ); fprintf( stdout, "data_sylv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Sylv( param_combo, FLA_ALG_FRONT, n_repeats, m, n, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; \n" ); fflush( stdout ); FLASH_Obj_free( &A ); FLASH_Obj_free( &B ); FLASH_Obj_free( &C ); FLASH_Obj_free( &C_ref ); } fprintf( stdout, "\n" ); } FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); fprintf( stdout, "figure;\n" ); fprintf( stdout, "hold on;\n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 2 ), '%c:%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_sylv_%s( :,1 ), data_sylv_%s( :, 4 ), '%c-.%c' ); \n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... \n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref\\_sylv\\_%s', 'fla\\_sylv\\_%s', ... \n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); \n" ); fprintf( stdout, "xlabel( 'problem size p' );\n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME sylv front-end performance (%s)' );\n", m_dim_desc ); fprintf( stdout, "print -depsc sylv_front_%s.eps\n", m_dim_tag ); fprintf( stdout, "hold off;\n"); fflush( stdout ); FLA_Finalize( ); }
//----------------------------------------------------------------------------- // FLASH_Clear //----------------------------------------------------------------------------- // // This routine erases <numbytes> starting from the FLASH addressed by // <dest> by performing a read-modify-write operation using <FLASH_TEMP> as // a temporary holding area. This function accepts <numbytes> up to // <FLASH_PAGESIZE>. // void FLASH_Clear (FLADDR dest, unsigned numbytes) { FLADDR dest_1_page_start; // first address in 1st page // containing <dest> FLADDR dest_1_page_end; // last address in 1st page // containing <dest> FLADDR dest_2_page_start; // first address in 2nd page // containing <dest> FLADDR dest_2_page_end; // last address in 2nd page // containing <dest> unsigned numbytes_remainder; // when crossing page boundary, // number of <src> bytes on 2nd page unsigned FLASH_pagesize; // size of FLASH page to update FLADDR wptr; // write address FLADDR rptr; // read address unsigned length; FLASH_pagesize = FLASH_PAGESIZE; dest_1_page_start = dest & ~(FLASH_pagesize - 1); dest_1_page_end = dest_1_page_start + FLASH_pagesize - 1; dest_2_page_start = (dest + numbytes) & ~(FLASH_pagesize - 1); dest_2_page_end = dest_2_page_start + FLASH_pagesize - 1; if (dest_1_page_end == dest_2_page_end) { // 1. Erase Scratch page FLASH_PageErase (FLASH_TEMP); // 2. Copy bytes from first byte of dest page to dest-1 to Scratch page wptr = FLASH_TEMP; rptr = dest_1_page_start; length = dest - dest_1_page_start; FLASH_Copy (wptr, rptr, length); // 3. Copy from (dest+numbytes) to dest_page_end to Scratch page wptr = FLASH_TEMP + dest - dest_1_page_start + numbytes; rptr = dest + numbytes; length = dest_1_page_end - dest - numbytes + 1; FLASH_Copy (wptr, rptr, length); // 4. Erase destination page FLASH_PageErase (dest_1_page_start); // 5. Copy Scratch page to destination page wptr = dest_1_page_start; rptr = FLASH_TEMP; length = FLASH_pagesize; FLASH_Copy (wptr, rptr, length); } else { // value crosses page boundary // 1. Erase Scratch page FLASH_PageErase (FLASH_TEMP); // 2. Copy bytes from first byte of dest page to dest-1 to Scratch page wptr = FLASH_TEMP; rptr = dest_1_page_start; length = dest - dest_1_page_start; FLASH_Copy (wptr, rptr, length); // 3. Erase destination page 1 FLASH_PageErase (dest_1_page_start); // 4. Copy Scratch page to destination page 1 wptr = dest_1_page_start; rptr = FLASH_TEMP; length = FLASH_pagesize; FLASH_Copy (wptr, rptr, length); // now handle 2nd page // 5. Erase Scratch page FLASH_PageErase (FLASH_TEMP); // 6. Copy bytes from numbytes remaining to dest-2_page_end to Scratch page numbytes_remainder = numbytes - (dest_1_page_end - dest + 1); wptr = FLASH_TEMP + numbytes_remainder; rptr = dest_2_page_start + numbytes_remainder; length = FLASH_pagesize - numbytes_remainder; FLASH_Copy (wptr, rptr, length); // 7. Erase destination page 2 FLASH_PageErase (dest_2_page_start); // 8. Copy Scratch page to destination page 2 wptr = dest_2_page_start; rptr = FLASH_TEMP; length = FLASH_pagesize; FLASH_Copy (wptr, rptr, length); } }
void time_Lyap( int param_combo, int type, int nrepeats, int m, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_save, norm; if ( param_combo == 0 && type == FLA_ALG_FRONT ) { *gflops = 0.0; *diff = 0.0; return; } FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( C ), 1, 1, 0, 0, &norm ); FLASH_Copy( C, C_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_save, C ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_NO_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_NO_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble\n"); } break; } case 1:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_CONJ_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_CONJ_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } /* if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } */ { FLA_Obj X, W; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &X ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &W ); FLASH_Copy( C, X ); FLASH_Hermitianize( FLA_UPPER_TRIANGULAR, X ); if ( param_combo == 0 ) { FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W ); } else if ( param_combo == 1 ) { FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W ); } FLASH_Scal( isgn, W ); FLASH_Axpy( FLA_MINUS_ONE, C_save, W ); FLASH_Norm1( W, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLASH_Obj_free( &X ); FLASH_Obj_free( &W ); } *gflops = ( 2.0 / 3.0 ) * ( m * m * m ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_save, C ); FLASH_Obj_free( &C_save ); FLA_Obj_free( &norm ); }
void time_Syrk( int param_combo, int type, int nrepeats, int m, int k, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * m * m * k / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
void time_Copy( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copy( A_flat, C_flat ); break; case FLA_ALG_FRONT: FLASH_Copy( A, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * m * n / dtime_old / 1.0e9; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &C_flat ); }
void time_Gemm( int param_combo, int type, int nrepeats, int m, int k, int n, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj C_old, A_flat, B_flat, C_flat; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat ); FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat ); FLASH_Copy( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_old, C ); FLASH_Obj_flatten( A, A_flat ); FLASH_Obj_flatten( B, B_flat ); FLASH_Obj_flatten( C, C_flat ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 1 case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 2 case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 3 case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 4 case 4:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 5 case 5:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 6 case 6:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 7 case 7:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } // Time parameter combination 8 case 8:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat ); break; case FLA_ALG_FRONT: FLASH_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * m * k * n / dtime_old / 1.0e9; if ( param_combo == 0 || param_combo == 1 || param_combo == 2 || param_combo == 3 || param_combo == 6 ) *gflops *= 4.0; *dtime = dtime_old; FLASH_Copy( C_old, C ); FLASH_Obj_free( &C_old ); FLASH_Obj_free( &A_flat ); FLASH_Obj_free( &B_flat ); FLASH_Obj_free( &C_flat ); }