Example #1
0
int main(int argc, char *argv[])
{
  int 
    m_input, k_input, n_input,
    m, k, n,
    p_first, p_last, p_inc,
    p,
    nb_alg,
    n_repeats,
    variant,
    i, j,
    datatype,
    n_variants = N_VARIANTS;
  
  char *colors = "brkgmcbrkg";
  char *ticks  = "o+*xso+*xs";
  char m_dim_desc[14];
  char k_dim_desc[14];
  char n_dim_desc[14];
  char m_dim_tag[10];
  char k_dim_tag[10];
  char n_dim_tag[10];

  double max_gflops=6.0;

  double
    dtime,
    gflops,
    diff,
    d_n;

  FLA_Obj
    A, B, C, C_ref;
  
  /* Initialize FLAME */
  FLA_Init( );


  fprintf( stdout, "%c number of repeats:", '%' );
  scanf( "%d", &n_repeats );
  fprintf( stdout, "%c %d\n", '%', n_repeats );

  fprintf( stdout, "%c Enter blocking size:", '%' );
  scanf( "%d", &nb_alg );
  fprintf( stdout, "%c %d\n", '%', nb_alg );

  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
  scanf( "%d%d%d", &p_first, &p_last, &p_inc );
  fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );

  fprintf( stdout, "%c enter m k n (-1 means bind to problem size): ", '%' );
  scanf( "%d%d%d", &m_input, &k_input, &n_input );
  fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input );


  /* Delete all existing data structures */
  fprintf( stdout, "\nclear all;\n\n" );


  if     ( m_input >  0 ) {
    sprintf( m_dim_desc, "m = %d", m_input );
    sprintf( m_dim_tag,  "m%dc", m_input);
  }
  else if( m_input <  -1 ) {
    sprintf( m_dim_desc, "m = p/%d", -m_input );
    sprintf( m_dim_tag,  "m%dp", -m_input );
  }
  else if( m_input == -1 ) {
    sprintf( m_dim_desc, "m = p" );
    sprintf( m_dim_tag,  "m%dp", 1 );
  }
  if     ( k_input >  0 ) {
    sprintf( k_dim_desc, "k = %d", k_input );
    sprintf( k_dim_tag,  "k%dc", k_input);
  }
  else if( k_input <  -1 ) {
    sprintf( k_dim_desc, "k = p/%d", -k_input );
    sprintf( k_dim_tag,  "k%dp", -k_input );
  }
  else if( k_input == -1 ) {
    sprintf( k_dim_desc, "k = p" );
    sprintf( k_dim_tag,  "k%dp", 1 );
  }
  if     ( n_input >  0 ) {
    sprintf( n_dim_desc, "n = %d", n_input );
    sprintf( n_dim_tag,  "n%dc", n_input);
  }
  else if( n_input <  -1 ) {
    sprintf( n_dim_desc, "n = p/%d", -n_input );
    sprintf( n_dim_tag,  "n%dp", -n_input );
  }
  else if( n_input == -1 ) {
    sprintf( n_dim_desc, "n = p" );
    sprintf( n_dim_tag,  "n%dp", 1 );
  }



  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
  {

    m = m_input;
    k = k_input;
    n = n_input;

    if( m < 0 ) m = p / abs(m_input);
    if( k < 0 ) k = p / abs(k_input);
    if( n < 0 ) n = p / abs(n_input);

    //datatype = FLA_FLOAT;
    datatype = FLA_DOUBLE;
    //datatype = FLA_COMPLEX;
    //datatype = FLA_DOUBLE_COMPLEX;

    /* Allocate space for the matrices */
    FLA_Obj_create( datatype, m, k, 0, 0, &A );
    FLA_Obj_create( datatype, k, n, 0, 0, &B );
    FLA_Obj_create( datatype, m, n, 0, 0, &C );
    FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );

    /* Generate random matrices A, C */
    FLA_Random_matrix( A );
    FLA_Random_matrix( B );
    FLA_Random_matrix( C );

    FLA_Copy_external( C, C_ref );


    /* Time the reference implementation */
    time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,
                  A, B, C, C_ref, &dtime, &diff, &gflops );

    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; \n", i, p, gflops );
    fflush( stdout );

    for ( variant = 1; variant <= n_variants; variant++ ){
      
      fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );
      fflush( stdout );


      time_Gemm_nn( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg,
                    A, B, C, C_ref, &dtime, &diff, &gflops );
      //gflops = 0.0;
      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
      fflush( stdout );

      time_Gemm_nn( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg,
                    A, B, C, C_ref, &dtime, &diff, &gflops );

      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
      fflush( stdout );

      //time_Gemm_nn( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg,
      //              A, B, C, C_ref, &dtime, &diff, &gflops );

      //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
      //fflush( stdout );


      fprintf( stdout, " ]; \n" );
      fflush( stdout );
    }
    fprintf( stdout, "\n" );


    FLA_Obj_free( &A );
    FLA_Obj_free( &B );
    FLA_Obj_free( &C );
    FLA_Obj_free( &C_ref );
  }

  /* Print the MATLAB commands to plot the data */

  /* Delete all existing figures */
  fprintf( stdout, "figure;\n" );

  /* Plot the performance of the reference implementation */
  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );

  /* Indicate that you want to add to the existing plot */
  fprintf( stdout, "hold on;\n" );

  /* Plot the data for the other numbers of threads */
  for ( i = 1; i <= n_variants; i++ ) {
    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
            i, i, colors[ i-1 ], ticks[ i-1 ] );
    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
            i, i, colors[ i-1 ], ticks[ i-1 ] );
    //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); \n",
    //        i, i, colors[ i-1 ], ticks[ i-1 ] );
  }

  fprintf( stdout, "legend( ... \n" );
  fprintf( stdout, "'Reference', ... \n" );

  for ( i = 1; i < n_variants; i++ )
    //fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', 'opt\\_var%d' ... \n", i, i, i );
    fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );

  i = n_variants;
  //fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', 'opt\\_var%d' ); \n", i, i, i );
  fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );


  fprintf( stdout, "xlabel( 'problem size p' );\n" );
  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
  fprintf( stdout, "title( 'FLAME gemm\\_nn performance (%s, %s, %s)' );\n", 
           m_dim_desc, k_dim_desc, n_dim_desc );
  fprintf( stdout, "print -depsc gemm_nn_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
  fprintf( stdout, "hold off;\n");
  fflush( stdout );

  FLA_Finalize( );
}
Example #2
0
int main(int argc, char *argv[])
{
  int 
    m_input, k_input, n_input,
    m, n, k,
    p_first, p_last, p_inc,
    p,
    nb_alg,
    nrepeats,
    variant,
    n_threads,
    n_thread_experiments,
    i, j,
    nvariants = N_VARIANTS;
  
  int  n_threads_exp[64];
  int  n_threads_exp_m[64];
  int  n_threads_exp_k[64];
  int  n_threads_exp_n[64];

  char *colors = "brkgmcbrkg";
  char *ticks  = "o+*xso+*xs";
  char m_dim_desc[14];
  char k_dim_desc[14];
  char n_dim_desc[14];
  char m_dim_tag[5];
  char k_dim_tag[5];
  char n_dim_tag[5];

  double max_gflops=6.0;

  double
    dtime,
    gflops,
    diff,
    d_n;

  FLA_Obj
    A, B, C, Cref;
  
  /* Initialize FLAME */
  FLA_Init( );
  FLA_Task_partitioning_init();


  fprintf( stdout, "%c number of repeats:", '%' );
  scanf( "%d", &nrepeats );
  fprintf( stdout, "%c %d\n", '%', nrepeats );

  fprintf( stdout, "%c Enter blocking size:", '%' );
  scanf( "%d", &nb_alg );
  fprintf( stdout, "%c %d\n", '%', nb_alg );

  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
  scanf( "%d%d%d", &p_first, &p_last, &p_inc );
  fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );

  fprintf( stdout, "%c enter m k n (-1 means bind to problem size): ", '%' );
  scanf( "%d%d%d", &m_input, &k_input, &n_input );
  fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input );

  fprintf( stdout, "%c enter variant or variant-permutation (1..6,13,31,15,35): ", '%' );
  scanf( "%d", &variant );
  fprintf( stdout, "%c %d\n", '%', variant );

  fprintf( stdout, "%c enter number of thread experiments: ", '%' );
  scanf( "%d", &n_thread_experiments );
  fprintf( stdout, "%c %d\n", '%', n_thread_experiments );

  fprintf( stdout, "%c enter t, t_m, t_k, and t_n for each experiment: ", '%' );
  for( i = 0; i < n_thread_experiments; ++i )
    scanf( "%d %d %d %d", &n_threads_exp[i], &n_threads_exp_m[i], &n_threads_exp_k[i], &n_threads_exp_n[i] );

  fprintf( stdout, "\n" );
  for( i = 0; i < n_thread_experiments; ++i )
    fprintf( stdout, "%c %2d = %2d x %2d x %2d\n", '%', n_threads_exp[i], n_threads_exp_m[i], n_threads_exp_k[i], n_threads_exp_n[i] );



  /* Delete all existing data structures */
  fprintf( stdout, "\nclear all;\n\n" );


  if     ( m_input >  0 ) {
    sprintf( m_dim_desc, "m = %d", m_input );
    sprintf( m_dim_tag,  "m%dc", m_input);
  }
  else if( m_input <  -1 ) {
    sprintf( m_dim_desc, "m = p/%d", -m_input );
    sprintf( m_dim_tag,  "m%dp", -m_input );
  }
  else if( m_input == -1 ) {
    sprintf( m_dim_desc, "m = p" );
    sprintf( m_dim_tag,  "m%dp", 1 );
  }
  if     ( k_input >  0 ) {
    sprintf( k_dim_desc, "k = %d", k_input );
    sprintf( k_dim_tag,  "k%dc", k_input);
  }
  else if( k_input <  -1 ) {
    sprintf( k_dim_desc, "k = p/%d", -k_input );
    sprintf( k_dim_tag,  "k%dp", -k_input );
  }
  else if( k_input == -1 ) {
    sprintf( k_dim_desc, "k = p" );
    sprintf( k_dim_tag,  "k%dp", 1 );
  }
  if     ( n_input >  0 ) {
    sprintf( n_dim_desc, "n = %d", n_input );
    sprintf( n_dim_tag,  "n%dc", n_input);
  }
  else if( n_input <  -1 ) {
    sprintf( n_dim_desc, "n = p/%d", -n_input );
    sprintf( n_dim_tag,  "n%dp", -n_input );
  }
  else if( n_input == -1 ) {
    sprintf( n_dim_desc, "n = p" );
    sprintf( n_dim_tag,  "n%dp", 1 );
  }




  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
  {

    m = m_input;
    k = k_input;
    n = n_input;

    if( m < 0 ) m = p / abs(m_input);
    if( k < 0 ) k = p / abs(k_input);
    if( n < 0 ) n = p / abs(n_input);


    /* Allocate space for the matrices */
    FLA_Obj_create( FLA_DOUBLE, m, k, &A );
    FLA_Obj_create( FLA_DOUBLE, k, n, &B );
    FLA_Obj_create( FLA_DOUBLE, m, n, &C );
    FLA_Obj_create( FLA_DOUBLE, m, n, &Cref );

    /* Generate random matrices A, C */
    FLA_Random_matrix( A );
    FLA_Random_matrix( B );
    FLA_Random_matrix( C );

    FLA_Copy_external( C, Cref );


    /* Time the reference implementation */
    time_Gemm_nn( 0, FLA_ALG_REFERENCE, nrepeats, n, nb_alg,
                  A, B, C, Cref, &dtime, &diff, &gflops );

    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; \n", i, p, gflops );
    fflush( stdout );

    for ( j = 0; j < n_thread_experiments; j++ ){
      
      n_threads = n_threads_exp[j];
      FLA_Task_partitioning_set( n_threads_exp[j], n_threads_exp_m[j], n_threads_exp_k[j], n_threads_exp_n[j] );
      FLA_omp_set_num_threads( n_threads_exp[j] );
      FLA_omp_set_num_stages( n_threads_exp_k[j] );

      fprintf( stdout, "data_nth%d_%dx%dx%d( %d, 1:3 ) = [ %d  ", 
               n_threads, n_threads_exp_m[j], n_threads_exp_k[j], n_threads_exp_n[j], i, p );
      fflush( stdout );

      //time_Gemm_nn( variant, FLA_ALG_OPENMP_BVAR, nrepeats, n, nb_alg,
      time_Gemm_nn( variant, FLA_ALG_OPENMP_CVAR, nrepeats, p, nb_alg,
                    A, B, C, Cref, &dtime, &diff, &gflops );

      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
      fflush( stdout );


      fprintf( stdout, " ]; \n" );
      fflush( stdout );
    }

    FLA_Obj_free( &A );
    FLA_Obj_free( &B );
    FLA_Obj_free( &C );
    FLA_Obj_free( &Cref );
    fprintf( stdout, "\n" );


  }

  /* Print the MATLAB commands to plot the data */

  /* Delete all existing figures */
  fprintf( stdout, "figure;\n" );

  /* Plot the performance of the reference implementation */
  //fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );

  /* Indicate that you want to add to the existing plot */
  fprintf( stdout, "hold on;\n" );

  /* Plot the data for the other numbers of threads */
  for ( i = 0; i < n_thread_experiments; i++ ){
    fprintf( stdout, "plot( data_nth%d_%dx%dx%d( :,1 ), data_nth%d_%dx%dx%d( :, 2 ), '%c:%c' ); \n", 
	    n_threads_exp[ i ], n_threads_exp_m[i], n_threads_exp_k[i], n_threads_exp_n[i],
        n_threads_exp[ i ], n_threads_exp_m[i], n_threads_exp_k[i], n_threads_exp_n[i],
        colors[ i ], ticks[ i ] );
  }

  fprintf( stdout, "legend( ... \n" );

  for ( i = 0; i < n_thread_experiments-1; i++ )
    fprintf( stdout, "'n\\_threads %d=%dx%dx%d', ... \n", 
             n_threads_exp[ i ], n_threads_exp_m[i], n_threads_exp_k[i], n_threads_exp_n[i] );

  i = n_thread_experiments-1;
  fprintf( stdout, "'n\\_threads %d=%dx%dx%d', 2 ); \n", 
             n_threads_exp[ i ], n_threads_exp_m[i], n_threads_exp_k[i], n_threads_exp_n[i] );

  fprintf( stdout, "xlabel( 'problem size p' );\n" );
  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, n_threads_exp[n_thread_experiments-1] * max_gflops );
  fprintf( stdout, "title( 'OpenFLAME gemm\\_nn\\_var%d performance (%s, %s, %s)' );\n", 
           variant, m_dim_desc, k_dim_desc, n_dim_desc );
  fprintf( stdout, "print -depsc gemm_nn_ompfac_var%d_%s_%s_%s.eps\n", variant, m_dim_tag, k_dim_tag, n_dim_tag );
  fprintf( stdout, "hold off;\n");
  fflush( stdout );

  FLA_Finalize( );
}
Example #3
0
int main(int argc, char *argv[])
{
  int 
    m_input, k_input, n_input,
    m, n, k,
    p_first, p_last, p_inc,
    p,
    nb_alg,
    n_repeats,
    variant,
    n_threads,
    n_thread_experiments,
    i, j;

  int n_threads_exp[64];

  char *colors = "brkgmckkk";
  char *ticks =  "o+*xso+*x";
  char m_dim_desc[14];
  char k_dim_desc[14];
  char n_dim_desc[14];
  char m_dim_tag[5];
  char k_dim_tag[5];
  char n_dim_tag[5];
  char nth_str[32];

  double max_gflops=6.0;

  double
    dtime,
    gflops,
    diff,
    d_n;

  FLA_Obj
    A, B, C, C_ref;

  
  /* Initialize FLAME */
  FLA_Init( );


  fprintf( stdout, "%c number of repeats:", '%' );
  scanf( "%d", &n_repeats );
  fprintf( stdout, "%c %d\n", '%', n_repeats );

  fprintf( stdout, "%c Enter blocking size:", '%' );
  scanf( "%d", &nb_alg );
  fprintf( stdout, "%c %d\n", '%', nb_alg );

  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
  scanf( "%d%d%d", &p_first, &p_last, &p_inc );
  fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );

  fprintf( stdout, "%c enter m k n (-1 means bind to problem size: ", '%' );
  scanf( "%d%d%d", &m_input, &k_input, &n_input );
  fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input );
  
  fprintf( stdout, "%c enter number of thread experiments: ", '%' );
  scanf( "%d", &n_thread_experiments );
  fprintf( stdout, "%c %d\n", '%', n_thread_experiments );

  fprintf( stdout, "%c enter number of threads for each experiment (separated by spaces): ", '%' );
  for( i = 0; i < n_thread_experiments; ++i )
    scanf( "%d", &n_threads_exp[i] );

  fprintf( stdout, "%c", '%' );
  for( i = 0; i < n_thread_experiments; ++i )
    fprintf( stdout, " %d", n_threads_exp[i] );

  /* Delete all existing data structures */
  fprintf( stdout, "\nclear all;\n\n" );


  if     ( m_input >  0 ) {
    sprintf( m_dim_desc, "m = %d", m_input );
    sprintf( m_dim_tag,  "m%dc", m_input);
  }
  else if( m_input <  -1 ) {
    sprintf( m_dim_desc, "m = p/%d", -m_input );
    sprintf( m_dim_tag,  "m%dp", -m_input );
  }
  else if( m_input == -1 ) {
    sprintf( m_dim_desc, "m = p" );
    sprintf( m_dim_tag,  "m%dp", 1 );
  }
  if     ( k_input >  0 ) {
    sprintf( k_dim_desc, "k = %d", k_input );
    sprintf( k_dim_tag,  "k%dc", k_input);
  }
  else if( k_input <  -1 ) {
    sprintf( k_dim_desc, "k = p/%d", -k_input );
    sprintf( k_dim_tag,  "k%dp", -k_input );
  }
  else if( k_input == -1 ) {
    sprintf( k_dim_desc, "k = p" );
    sprintf( k_dim_tag,  "k%dp", 1 );
  }
  if     ( n_input >  0 ) {
    sprintf( n_dim_desc, "n = %d", n_input );
    sprintf( n_dim_tag,  "n%dc", n_input);
  }
  else if( n_input <  -1 ) {
    sprintf( n_dim_desc, "n = p/%d", -n_input );
    sprintf( n_dim_tag,  "n%dp", -n_input );
  }
  else if( n_input == -1 ) {
    sprintf( n_dim_desc, "n = p" );
    sprintf( n_dim_tag,  "n%dp", 1 );
  }

  m = p_last;
  k = p_last;
  n = p_last;
  



  sprintf( nth_str, "OMP_NUM_THREADS=%d", n_threads_exp[ n_thread_experiments-1 ] );
  putenv( nth_str );
  blas_cpu_number = n_threads_exp[ n_thread_experiments-1 ];
  blas_thread_init();



  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
  {
    m = m_input;
    k = k_input;
    n = n_input;

    if( m < 0 ) m = p / abs(m_input);
    if( k < 0 ) k = p / abs(k_input);
    if( n < 0 ) n = p / abs(n_input);
	
    FLA_Obj_create( FLA_DOUBLE, m, k, &A );
    FLA_Obj_create( FLA_DOUBLE, k, n, &B );
    FLA_Obj_create( FLA_DOUBLE, m, n, &C );
    FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref );

	
    /* Generate random matrices A, C */
	if( p > 4000 ){
    FLA_Random_matrix( A );
    FLA_Random_matrix( B );
    FLA_Random_matrix( C );
	
    FLA_Copy_external( C, C_ref );
	}
	


    blas_cpu_number = 1;

    //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,
    //                A, B, C, C_ref, &dtime, &diff, &gflops );

    //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; \n", i, p, gflops );
    //fflush( stdout );

    for ( j = 0; j < n_thread_experiments; j++ ){

      n_threads = n_threads_exp[j];
      blas_cpu_number = n_threads;

      fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d  ", n_threads, i, p );
      fflush( stdout );

      time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,
                    A, B, C, C_ref, &dtime, &diff, &gflops );

      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
      fflush( stdout );

      fprintf( stdout, " ]; \n" );
      fflush( stdout );
    }

    fprintf( stdout, "\n" );

    FLA_Obj_free( &A );
    FLA_Obj_free( &B );
    FLA_Obj_free( &C );
    FLA_Obj_free( &C_ref );



  }

  /* Print the MATLAB commands to plot the data */

  /* Delete all existing figures */
  fprintf( stdout, "figure;\n" );

  /* Indicate that you want to add to the existing plot */
  fprintf( stdout, "hold on;\n" );

  /* Plot the data for the other numbers of threads */
  for ( i = 0; i < n_thread_experiments; i++ ){
    fprintf( stdout, "plot( data_nth%d( :,1 ), data_nth%d( :, 2 ), '%c:%c' ); \n", 
             n_threads_exp[ i ], n_threads_exp[ i ], colors[ i ], ticks[ i ] );
  }

  fprintf( stdout, "legend( ... \n" );

  for ( i = 0; i < n_thread_experiments-1; i++ )
    fprintf( stdout, "'%d threads', ... \n", n_threads_exp[ i ] );

  fprintf( stdout, "'%d threads', 'Location', 'Best' ); \n", n_threads_exp[ n_thread_experiments-1 ] );

  fprintf( stdout, "xlabel( 'problem size p' );\n" );
  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, n_threads_exp[n_thread_experiments-1] * max_gflops );
  fprintf( stdout, "title( 'Goto BLAS dgemm performance (%s, %s, %s)' );\n", 
           m_dim_desc, k_dim_desc, n_dim_desc );
  fprintf( stdout, "print -depsc gemm_nn_goto_p_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
  fprintf( stdout, "hold off;\n");
  fflush( stdout );

  FLA_Finalize( );
}
Example #4
0
int main(int argc, char *argv[])
{
  int 
    m_input, k_input, n_input,
    m, n, k,
    p_first, p_last, p_inc,
    p,
    nb_alg,
    n_repeats,
    variant,
    i, j;

  int n_threads_exp[64];

  char *colors = "brkgmckkk";
  char *ticks  = "o+*xso+*x";
  char m_dim_desc[14] = "";
  char k_dim_desc[14] = "";
  char n_dim_desc[14] = "";
  char m_dim_tag[5] = "";
  char k_dim_tag[5] = "";
  char n_dim_tag[5] = "";

  double max_gflops=6.0;

  double
    dtime,
    gflops,
    diff,
    d_n;

  FLA_Obj
    A, B, C, C_ref,
    ATL, ATR, ABL, ABR,
    BTL, BTR, BBL, BBR,
    CTL, CTR, CBL, CBR;
  
  /* Initialize FLAME */
  FLA_Init( );


  fprintf( stdout, "%c number of repeats:", '%' );
  scanf( "%d", &n_repeats );
  fprintf( stdout, "%c %d\n", '%', n_repeats );

  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
  scanf( "%d%d%d", &p_first, &p_last, &p_inc );
  fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );

  fprintf( stdout, "%c enter m k n (-1 means bind to problem size): ", '%' );
  scanf( "%d%d%d", &m_input, &k_input, &n_input );
  fprintf( stdout, "%c %d %d %d\n", '%', m_input, k_input, n_input );
  
  /* Delete all existing data structures */
  fprintf( stdout, "\nclear all;\n\n" );


  if     ( m_input >  0 ) {
    sprintf( m_dim_desc, "m = %d", m_input );
    sprintf( m_dim_tag,  "m%dc", m_input);
  }
  else if( m_input <  -1 ) {
    sprintf( m_dim_desc, "m = p/%d", -m_input );
    sprintf( m_dim_tag,  "m%dp", -m_input );
  }
  else if( m_input == -1 ) {
    sprintf( m_dim_desc, "m = p" );
    sprintf( m_dim_tag,  "m%dp", 1 );
  }
  if     ( k_input >  0 ) {
    sprintf( k_dim_desc, "k = %d", k_input );
    sprintf( k_dim_tag,  "k%dc", k_input);
  }
  else if( k_input <  -1 ) {
    sprintf( k_dim_desc, "k = p/%d", -k_input );
    sprintf( k_dim_tag,  "k%dp", -k_input );
  }
  else if( k_input == -1 ) {
    sprintf( k_dim_desc, "k = p" );
    sprintf( k_dim_tag,  "k%dp", 1 );
  }
  if     ( n_input >  0 ) {
    sprintf( n_dim_desc, "n = %d", n_input );
    sprintf( n_dim_tag,  "n%dc", n_input);
  }
  else if( n_input <  -1 ) {
    sprintf( n_dim_desc, "n = p/%d", -n_input );
    sprintf( n_dim_tag,  "n%dp", -n_input );
  }
  else if( n_input == -1 ) {
    sprintf( n_dim_desc, "n = p" );
    sprintf( n_dim_tag,  "n%dp", 1 );
  }

  m = p_last;
  k = p_last;
  n = p_last;

  FLA_Obj_create( FLA_DOUBLE, m, k, &A );
  FLA_Obj_create( FLA_DOUBLE, k, n, &B );
  FLA_Obj_create( FLA_DOUBLE, m, n, &C );
  FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref );




  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
  {
    
    m = m_input;
    k = k_input;
    n = n_input;

    if( m < 0 ) m = p / abs(m_input);
    if( k < 0 ) k = p / abs(k_input);
    if( n < 0 ) n = p / abs(n_input);


    FLA_Part_2x2( A, &ATL, /**/ &ATR,
                  /* *************** */
                     &ABL, /**/ &ABR,
               m, k, FLA_TL );

    FLA_Part_2x2( B, &BTL, /**/ &BTR,
                  /* *************** */
                     &BBL, /**/ &BBR,
               k, n, FLA_TL );

    FLA_Part_2x2( C, &CTL, /**/ &CTR,
                  /* *************** */
                     &CBL, /**/ &CBR,
               m, n, FLA_TL );


    FLA_Random_matrix( ATL );
    FLA_Random_matrix( BTL );
    FLA_Random_matrix( CTL );


    time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, n, nb_alg,
		ATL, BTL, CTL, C_ref, &dtime, &diff, &gflops );

    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; \n", i, p, gflops );
    fflush( stdout );

    fprintf( stdout, "\n" );


  }


  FLA_Obj_free( &A );
  FLA_Obj_free( &B );
  FLA_Obj_free( &C );
  FLA_Obj_free( &C_ref );

  FLA_Finalize( );

  fprintf( stdout, "figure;\n" );

  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), 'b-' ); \n" );

  fprintf( stdout, "legend( ... \n" );
  fprintf( stdout, "'Goto BLAS gemm', 2 ); \n" );

  fprintf( stdout, "xlabel( 'problem size p' );\n" );
  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n");
  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
  fprintf( stdout, "title( 'Goto BLAS gemm\\_nn performance (%s, %s, %s)' );\n", 
           m_dim_desc, k_dim_desc, n_dim_desc );
  fprintf( stdout, "print -depsc gemm_nn_goto_%s_%s_%s.eps\n", m_dim_tag, k_dim_tag, n_dim_tag );
  fprintf( stdout, "hold off;\n");
  fflush( stdout );
}