Пример #1
0
void time_Gemm_hh( 
               int variant, int type, int nrepeats, int n, int nb_alg,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old;

  fla_blocksize_t*
    bp;
  fla_gemm_t*
    cntl_gemm_blas;
  fla_gemm_t*
    cntl_gemm_var;

  bp             = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
  cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
  cntl_gemm_var  = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas );

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < nrepeats; irep++ ){
    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( variant ){
    // Time reference implementation
    case 0:
      REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, 
                FLA_ONE, A, B, FLA_ONE, C );
      break;

    // Time variant 1
    case 1:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 2
    case 2:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 3
    case 3:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 4
    case 4:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 5
    case 5:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var5( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 6
    case 6:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var6( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  FLA_Cntl_obj_free( cntl_gemm_var );
  FLA_Cntl_obj_free( cntl_gemm_blas );
  FLA_Blocksize_free( bp );

  if ( variant == 0 )
  {
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1.0e9;

  if ( FLA_Obj_is_complex( C ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}
Пример #2
0
void time_Gemm( 
               int param_combo, int type, int nrepeats, int m, int k, int n,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old;

  if ( param_combo != 4 )
  {
    *gflops = 0.0;
    *diff   = 0.0;
    return;
  }

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < nrepeats; irep++ ){
    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( param_combo ){

    // Time parameter combination 0
    case 0:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 1
    case 1:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 2
    case 2:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 3
    case 3:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 4
    case 4:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        //FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        //FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 5
    case 5:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 6
    case 6:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 7
    case 7:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time parameter combination 8
    case 8:{
      switch( type ){
      case FLA_ALG_REFERENCE:
        REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      case FLA_ALG_FRONT:
        FLA_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }


/*
  if ( type == FLA_ALG_REFERENCE )
  {
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }
*/

  *gflops = 2.0 * m * k * n / 
            dtime_old / 
            1.0e9;

  if ( param_combo == 0 ||
       param_combo == 1 ||
       param_combo == 2 ||
       param_combo == 3 ||
       param_combo == 6 )
  *gflops *= 4.0;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}
Пример #3
0
void time_Gemm_pp_nn( 
		     int variant, int type, int nrepeats, int n, int nb_alg,
		     FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,
		     double *dtime, double *diff, double *mflops )
{
  int
    irep,
    info, lwork;

  double
    dtime_old,
    d_minus_one = -1.0, d_one = 1.0;

  FLA_Obj
    Cold;

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );

  FLA_Copy_external( C, Cold );

  for ( irep = 0 ; irep < nrepeats; irep++ ){
    FLA_Copy_external( Cold, C );

    *dtime = FLA_Clock();

    switch( variant ){
    case 0:
      // Time reference implementation
      REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
		ONE, A, B, FLA_ONE, C );
      break;

    case 1:{
      // Time variant 1
      switch( type ){
      case FLA_ALG_UNBLOCKED:
	FLA_Gemm_pp_nn_var1( FLA_ONE, A, B, C, nb_alg );
	break;
      case FLA_ALG_BLOCKED:
        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
		  ONE, A, B, FLA_ONE, C );
	break;
      default:
	printf("trouble\n");
      }

      break;
    }
    }

    if ( irep == 0 )
      dtime_old = FLA_Clock() - *dtime;
    else{
      *dtime = FLA_Clock() - *dtime;
      dtime_old = min( *dtime, dtime_old );
    }
  }

  if ( variant == 0 ){
    FLA_Copy_external( C, Cref );
    *diff = 0.0;
  }
  else{
    *diff = FLA_Max_elemwise_diff( C, Cref );
  }

  *mflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1000000;

  *dtime = dtime_old;

  FLA_Copy_external( Cold, C );

  FLA_Obj_free( &Cold );
}
Пример #4
0
void time_Gemm_nn( 
               int variant, int type, int nrepeats, int n, int nb_alg,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep,
    info, lwork;

  double
    dtime_old,
    d_minus_one = -1.0, d_one = 1.0;

  FLA_Obj
    Cold;

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );

  FLA_Copy_external( C, Cold );

  for ( irep = 0 ; irep < nrepeats; irep++ ){
    FLA_Copy_external( Cold, C );

    *dtime = FLA_Clock();

    switch( variant ){
    case 0:
      // Time reference implementation
      REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
                FLA_ONE, A, B, FLA_ONE, C );
      break;

    case 1:{
      // Time variant 1
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var1( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 2:{
      // Time variant 2
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var2( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 3:{
      // Time variant 3
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var3( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 4:{
      // Time variant 4
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var4( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 5:{
      // Time variant 5
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var5( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 6:{
      // Time variant 6
      switch( type ){
      case FLA_ALG_OPENMP_BVAR:
        FLA_Gemm_nn_omp_var6( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 13:{
      // Time variant 1->3
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var13( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 15:{
      // Time variant 1->5
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 31:{
      // Time variant 3->1 
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 35:{
      // Time variant 3->5 
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 51:{
      // Time variant 5->1 
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    case 53:{
      // Time variant 5->3 
      switch( type ){
      case FLA_ALG_OPENMP_CVAR:
        FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }
    }

    if ( irep == 0 )
      dtime_old = FLA_Clock() - *dtime;
    else{
      *dtime = FLA_Clock() - *dtime;
      dtime_old = min( *dtime, dtime_old );
    }
  }


  if ( variant == 0 ){
    FLA_Copy_external( C, Cref );
    *diff = 0.0;
  }
  else{
    *diff = FLA_Max_elemwise_diff( C, Cref );
    //FLA_Obj_show( "C:", C, "%f", "\n");
  }

  *gflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1e9;

  *dtime = dtime_old;

  FLA_Copy_external( Cold, C );

  FLA_Obj_free( &Cold );
}