예제 #1
0
void libfla_test_hemm_cntl_create( unsigned int var,
                                   dim_t        b_alg_flat )
{
	int var_unb = FLA_UNB_VAR_OFFSET + var;
	int var_blk = FLA_BLK_VAR_OFFSET + var;

	hemm_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
	                                        b_alg_flat,
	                                        b_alg_flat,
	                                        b_alg_flat );

	hemm_cntl_unb   = FLA_Cntl_hemm_obj_create( FLA_FLAT,
	                                            var_unb,
	                                            NULL,
	                                            NULL,
	                                            NULL,
	                                            NULL,
	                                            NULL );

	hemm_cntl_blk   = FLA_Cntl_hemm_obj_create( FLA_FLAT,
	                                            var_blk,
	                                            hemm_cntl_bsize,
	                                            fla_scal_cntl_blas,
	                                            fla_hemm_cntl_blas,
	                                            fla_gemm_cntl_blas,
	                                            fla_gemm_cntl_blas );
}
예제 #2
0
void FLA_Hemm_cntl_init()
{
	// Set blocksizes with default values for conventional storage.
	fla_hemm_var1_bsize = FLA_Query_blocksizes( FLA_DIMENSION_MIN );
	fla_hemm_var9_bsize = FLA_Query_blocksizes( FLA_DIMENSION_MIN );

	// Create a control tree that assumes A and B are b x b blocks.
	fla_hemm_cntl_blas  = FLA_Cntl_hemm_obj_create( FLA_FLAT, 
	                                                FLA_SUBPROBLEM,
	                                                NULL,
	                                                NULL,
	                                                NULL,
	                                                NULL,
	                                                NULL );

	// Create a control tree that assumes A is a block and B is a panel.
	fla_hemm_cntl_bp    = FLA_Cntl_hemm_obj_create( FLA_FLAT,
	                                                FLA_BLOCKED_VARIANT9,
	                                                fla_hemm_var9_bsize,
	                                                fla_scal_cntl_blas,
	                                                fla_hemm_cntl_blas,
	                                                NULL,
	                                                NULL );

	// Create a control tree that assumes A is large and B is a panel.
	fla_hemm_cntl_mp    = FLA_Cntl_hemm_obj_create( FLA_FLAT,
	                                                FLA_BLOCKED_VARIANT1,
	                                                fla_hemm_var1_bsize,
	                                                fla_scal_cntl_blas,
	                                                fla_hemm_cntl_blas,
	                                                fla_gemm_cntl_blas,
	                                                fla_gemm_cntl_blas );

	// Create a control tree that assumes A and B are both large.
	fla_hemm_cntl_mm    = FLA_Cntl_hemm_obj_create( FLA_FLAT,
	                                                FLA_BLOCKED_VARIANT9,
	                                                fla_hemm_var9_bsize,
	                                                fla_scal_cntl_blas,
	                                                fla_hemm_cntl_mp,
	                                                NULL,
	                                                NULL );

}
예제 #3
0
void time_Hemm_ll( 
               int variant, int type, int nrepeats, int n, int nb_alg,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9; 

  FLA_Obj
    C_old;

  fla_blocksize_t*
    bp;
  fla_gemm_t*
    cntl_gemm_blas;
  fla_hemm_t*
    cntl_hemm_blas;
  fla_hemm_t*
    cntl_hemm_var;

  if ( type == FLA_ALG_UNBLOCKED && n > 300 )
  {
    *diff = 0.0;
    *gflops = 0.0;
    return;
  }

  bp             = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
  cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
  cntl_hemm_blas = FLA_Cntl_hemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );
  cntl_hemm_var  = FLA_Cntl_hemm_obj_create( FLA_FLAT, variant, bp, cntl_hemm_blas, cntl_gemm_blas, cntl_gemm_blas );

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < nrepeats; irep++ )
  {
    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( variant ){
    case 0:
      // Time reference implementation
      REF_Hemm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A, B, FLA_ONE, C );
      break;

    case 1:{
      // Time variant 1
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 2:{
      // Time variant 2
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 3:{
      // Time variant 3
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 4:{
      // Time variant 4
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 5:{
      // Time variant 5
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var5( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 6:{
      // Time variant 6
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var6( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 7:{
      // Time variant 7
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var7( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var7( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 8:{
      // Time variant 8
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var8( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var8( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 9:{
      // Time variant 9
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var9( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var9( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 10:{
      // Time variant 10
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Hemm_ll_unb_var10( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Hemm_ll_blk_var10( FLA_ONE, A, B, FLA_ONE, C, cntl_hemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  FLA_Cntl_obj_free( cntl_hemm_var );
  FLA_Cntl_obj_free( cntl_hemm_blas );
  FLA_Cntl_obj_free( cntl_gemm_blas );
  FLA_Blocksize_free( bp );

  if ( variant == 0 )
  {
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1e9;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}