예제 #1
0
void time_Apply_Q( 
               int param_combo, int type, int nrepeats, int m, int n,
               FLA_Obj A, FLA_Obj B, FLA_Obj B_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    B_save, A_flat, B_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_save );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat );

  FLASH_Copy( B, B_save );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( B_save, B );
    FLASH_Obj_flatten( A, A_flat );
    FLASH_Obj_flatten( B, B_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Apply_Q( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A_flat, t, B_flat );
        break;
      case FLA_ALG_FRONT:
//printf("\n");
        FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, T, W, B );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


  if ( type == FLA_ALG_REFERENCE )
  {
    FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
                       FLA_NONUNIT_DIAG, FLA_ONE, A_flat, B_flat );

    FLASH_Obj_hierarchify( B_flat, B_ref );

    *diff = 0.0;
  }
  else
  {
    FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
                FLA_ONE, A, B );

    *diff = FLASH_Max_elemwise_diff( B, B_ref );
  }

  *gflops = 2.0 * 
            FLASH_Obj_scalar_length( A ) * 
            FLASH_Obj_scalar_width( A ) * 
            FLASH_Obj_scalar_width( B ) / 
            dtime_old / 
            1.0e9;

  if ( FLA_Obj_is_complex( A ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLASH_Copy( B_save, B );

  FLASH_Obj_free( &B_save );
  FLASH_Obj_free( &A_flat );
  FLASH_Obj_free( &B_flat );
}
예제 #2
0
void time_Copy( 
               int param_combo, int type, int nrepeats, int m, int n,
               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old, A_flat, C_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );

  FLASH_Copy( C, C_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_old, C );
    FLASH_Obj_flatten( A, A_flat );
    FLASH_Obj_flatten( C, C_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Copy( A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Copy( A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


  if ( type == FLA_ALG_REFERENCE )
  {
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * m * n / 
            dtime_old / 
            1.0e9;

  *dtime = dtime_old;

  FLASH_Copy( C_old, C );

  FLASH_Obj_free( &C_old );
  FLASH_Obj_free( &A_flat );
  FLASH_Obj_free( &C_flat );
}
예제 #3
0
파일: time_Trmm.c 프로젝트: pgawron/tlash
void time_Trmm( 
               int param_combo, int type, int nrepeats, int m, int n,
               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old, A_flat, C_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );

  FLASH_Copy( C, C_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_old, C );
    FLASH_Obj_flatten( A, A_flat );
    FLASH_Obj_flatten( C, C_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 1
    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 2
    case 2:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 3
    case 3:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 4
    case 4:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 5
    case 5:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 6
    case 6:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 7
    case 7:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 8
    case 8:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 9
    case 9:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 10
    case 10:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 11
    case 11:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trmm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


  if ( type == FLA_ALG_REFERENCE )
  {
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 1.0 * 
            FLASH_Obj_scalar_length( C ) * 
            FLASH_Obj_scalar_width( C ) * 
            FLASH_Obj_scalar_width( A ) / 
            dtime_old / 
            1.0e9;

  if ( param_combo == 0 ||
       param_combo == 3 ||
       param_combo == 6 ||
       param_combo == 9 )
  *gflops *= 4.0;

  *dtime = dtime_old;

  FLASH_Copy( C_old, C );

  FLASH_Obj_free( &C_old );
  FLASH_Obj_free( &A_flat );
  FLASH_Obj_free( &C_flat );
}
예제 #4
0
void time_Lyap(
                int param_combo, int type, int nrepeats, int m,
                FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale,
                double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_save, norm;

  if ( param_combo == 0 && type == FLA_ALG_FRONT )
  {
    *gflops = 0.0;
    *diff   = 0.0;
    return;
  }

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_save );
  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( C ), 1, 1, 0, 0, &norm );

  FLASH_Copy( C, C_save );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_save, C );

    *dtime = FLA_Clock();

    switch( param_combo ){

    case 0:{
      switch( type ){
      //case FLA_ALG_REFERENCE:
      //  REF_Lyap( FLA_NO_TRANSPOSE, isgn, A_flat, C_flat, scale );
      //  break;
      case FLA_ALG_FRONT:
        FLASH_Lyap( FLA_NO_TRANSPOSE, isgn, A, C, scale );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 1:{
      switch( type ){
      //case FLA_ALG_REFERENCE:
      //  REF_Lyap( FLA_CONJ_TRANSPOSE, isgn, A_flat, C_flat, scale );
      //  break;
      case FLA_ALG_FRONT:
        FLASH_Lyap( FLA_CONJ_TRANSPOSE, isgn, A, C, scale );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

/*
  if ( type == FLA_ALG_REFERENCE )
  {
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }
*/

  {
    FLA_Obj X, W;

    FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &X );
    FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &W );

    FLASH_Copy( C, X );
    FLASH_Hermitianize( FLA_UPPER_TRIANGULAR, X );

    if ( param_combo == 0 )
    {
      FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,   FLA_ONE, A, X, FLA_ZERO, W );
      FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, X, A, FLA_ONE,  W );
    }
    else if ( param_combo == 1 )
    {
      FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W );
      FLASH_Gemm( FLA_NO_TRANSPOSE,   FLA_NO_TRANSPOSE, FLA_ONE, X, A, FLA_ONE,  W );
    }

    FLASH_Scal( isgn, W );

    FLASH_Axpy( FLA_MINUS_ONE, C_save, W );
    FLASH_Norm1( W, norm );
    FLA_Obj_extract_real_scalar( norm, diff );

    FLASH_Obj_free( &X );
    FLASH_Obj_free( &W );
  }

  *gflops = ( 2.0 / 3.0 ) * ( m * m * m ) / 
            dtime_old / 1e9;

  if ( FLA_Obj_is_complex( C ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLASH_Copy( C_save, C );

  FLASH_Obj_free( &C_save );
  FLA_Obj_free( &norm );
}
예제 #5
0
int main( int argc, char *argv[] )
{
  int
    m_input, n_input,
    m, n, rs, cs,
    i,
    datatype;

  int blocksize[3];
  int depth;
  double buffer[64];
  double buffer2[64];

  FLA_Obj Af, Ah, Bh;

  FLA_Init();

  fprintf( stdout, "%c Enter hierarchy depth:", '%' );
  scanf( "%d", &depth );
  fprintf( stdout, "%c %d\n", '%', depth );

  for ( i = 0; i < depth; ++i )
  {
    fprintf( stdout, "%c Enter blocksize %d:", '%', i );
    scanf( "%d", &blocksize[i] );
    fprintf( stdout, "%c %d\n", '%', blocksize[i] );
  }

  fprintf( stdout, "%c enter m n: ", '%' );
  scanf( "%d%d", &m_input, &n_input );
  fprintf( stdout, "%c %d %d\n", '%', m_input, n_input );

  datatype      = FLA_DOUBLE;
  m             = m_input;
  n             = n_input;
  rs            = 1;
  cs            = m_input;

  for( i = 0; i < 64; i++ ) buffer[i] = ( double ) i;
  for( i = 0; i < 64; i++ ) buffer2[i] = ( double ) 0;

  //FLASH_Obj_create( datatype, m, n, depth, blocksize, &Ah );
  FLASH_Obj_create_without_buffer( datatype, m, n, depth, blocksize, &Ah );
  FLASH_Obj_attach_buffer( buffer, rs, cs, &Ah );

  //FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, Ah, &Af );
  //FLASH_Obj_create_hier_conf_to_flat( FLA_NO_TRANSPOSE, Af, depth, blocksize, &Bh );
  //FLASH_Obj_create_flat_copy_of_hier( Ah, &Af );
  //FLASH_Obj_create_hier_copy_of_flat( Af, depth, blocksize, &Bh );
  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, Ah, &Bh );

  //FLASH_Axpy( FLA_TWO, Ah, Bh );
  FLASH_Copy( Ah, Bh );

  //FLA_Obj_create_without_buffer( datatype, 4, 4, &Af );
  //FLA_Obj_attach_buffer( buffer2, 4, &Af );

  //FLASH_Axpy_flat_to_hier( FLA_TWO, Af, 1, 1, Ah );
  //FLASH_Axpy_hier_to_flat( FLA_TWO, 1, 1, Ah, Af );
  //FLASH_Axpy_buffer_to_hier( FLA_ONE, 4, 4, buffer, 4, 1, 1, Ah );

  //FLASH_Axpy_hier_to_buffer( FLA_ONE, 2, 2, Ah, 4, 4, buffer2, 4 );

  //fprintf( stderr, "T: Am An = %d %d\n", FLASH_Obj_scalar_length( Ah ), 
  //                                       FLASH_Obj_scalar_width( Ah ) );

  //FLASH_Random_matrix( Ah );

  //fprintf( stderr, "depth = %d\n", FLASH_Obj_depth( Ah ) );;

/*
  {
    int depth;
    int b_m[4];
    int b_n[4];

    depth = FLASH_Obj_blocksizes( Bh, b_m, b_n );
    fprintf( stderr, "depth = %d\n", depth );;
    fprintf( stderr, "b_m[0] = %d\n", b_m[0] );;
    fprintf( stderr, "b_n[0] = %d\n", b_n[0] );;
  }
*/

  FLASH_Obj_show( "", Ah, "%11.4e", "" );
  FLASH_Obj_show( "", Bh, "%11.4e", "" );

  //FLA_Obj_show( "", Af, "%11.4e", "" );
  //FLASH_print_struct( Ah );

  //fprintf( stderr, "max_diff = %e\n", FLASH_Max_elemwise_diff( Ah, Bh ) );;

  //FLASH_Obj_free_without_buffer( &Ah );
  //FLASH_Obj_free( &Af );
  //FLA_Obj_free( &Af );


  FLA_Finalize();

  return 0;
}
예제 #6
0
void time_Syrk( 
               int param_combo, int type, int nrepeats, int m, int k,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old, A_flat, C_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );

  FLASH_Copy( C, C_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_old, C );
    FLASH_Obj_flatten( A, A_flat );
    FLASH_Obj_flatten( C, C_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 1
    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 2
    case 2:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 3
    case 3:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


  if ( type == FLA_ALG_REFERENCE )
  {
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 1.0 * m * m * k /
            dtime_old / 
            1.0e9;

  if ( FLA_Obj_is_complex( C ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLASH_Copy( C_old, C );

  FLASH_Obj_free( &C_old );
  FLASH_Obj_free( &A_flat );
  FLASH_Obj_free( &C_flat );
}
예제 #7
0
void time_SPDinv(
                int param_combo, int type, int nrepeats, int m,
                FLA_Obj C, FLA_Obj C_ref,
                double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old, C_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );

  FLASH_Copy( C, C_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_old, C );
    FLASH_Obj_flatten( C, C_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_SPDinv( FLA_LOWER_TRIANGULAR, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_SPDinv( FLA_LOWER_TRIANGULAR, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_SPDinv( FLA_UPPER_TRIANGULAR, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_SPDinv( FLA_UPPER_TRIANGULAR, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  if ( type == FLA_ALG_REFERENCE ){
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else{
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 1.0 * 
            FLASH_Obj_scalar_length( C ) * 
            FLASH_Obj_scalar_length( C ) * 
            FLASH_Obj_scalar_length( C ) / 
            dtime_old / 1e9;

  *dtime = dtime_old;

  FLASH_Copy( C_old, C );

  FLASH_Obj_free( &C_old );
  FLASH_Obj_free( &C_flat );
}
예제 #8
0
void time_Trinv(
                 int param_combo, int type, int nrepeats, int m,
                 FLA_Diag diag, FLA_Obj A, FLA_Obj A_ref,
                 double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    A_old, A_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );

  FLASH_Copy( A, A_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( A_old, A );
    FLASH_Obj_flatten( A, A_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trinv( FLA_LOWER_TRIANGULAR, diag, A_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trinv( FLA_LOWER_TRIANGULAR, diag, A );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Trinv( FLA_UPPER_TRIANGULAR, diag, A_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Trinv( FLA_UPPER_TRIANGULAR, diag, A );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  if ( type == FLA_ALG_REFERENCE ){
    FLASH_Obj_hierarchify( A_flat, A_ref );
    *diff = 0.0;
  }
  else{
    *diff = FLASH_Max_elemwise_diff( A, A_ref );
  }

  *gflops = 1.0 / 4.0 * m * m * m /
            dtime_old / 1e9;

  *dtime = dtime_old;

  FLASH_Copy( A_old, A );

  FLASH_Obj_free( &A_old );
  FLASH_Obj_free( &A_flat );
}
예제 #9
0
파일: time_Gemm.c 프로젝트: pgawron/tlash
void time_Gemm( 
               int param_combo, int type, int nrepeats, int m, int k, int n,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old, A_flat, B_flat, C_flat;

  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat );
  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );

  FLASH_Copy( C, C_old );

  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLASH_Copy( C_old, C );
    FLASH_Obj_flatten( A, A_flat );
    FLASH_Obj_flatten( B, B_flat );
    FLASH_Obj_flatten( C, C_flat );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 1
    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 2
    case 2:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 3
    case 3:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 4
    case 4:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 5
    case 5:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 6
    case 6:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 7
    case 7:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 8
    case 8:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A_flat, B_flat, FLA_ZERO, C_flat );
        break;
      case FLA_ALG_FRONT:
        FLASH_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


  if ( type == FLA_ALG_REFERENCE )
  {
    FLASH_Obj_hierarchify( C_flat, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLASH_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * m * k * n / 
            dtime_old / 
            1.0e9;

  if ( param_combo == 0 ||
       param_combo == 1 ||
       param_combo == 2 ||
       param_combo == 3 ||
       param_combo == 6 )
  *gflops *= 4.0;

  *dtime = dtime_old;

  FLASH_Copy( C_old, C );

  FLASH_Obj_free( &C_old );
  FLASH_Obj_free( &A_flat );
  FLASH_Obj_free( &B_flat );
  FLASH_Obj_free( &C_flat );
}