void FLASH_Apply_CAQ2_UT_cntl_finalize()
{
    FLA_Cntl_obj_free( flash_apcaq2ut_cntl_leaf );
    FLA_Cntl_obj_free( flash_apcaq2ut_cntl_mid );
    FLA_Cntl_obj_free( flash_apcaq2ut_cntl );

    FLA_Blocksize_free( flash_apcaq2ut_var2_bsize );
    FLA_Blocksize_free( flash_apcaq2ut_var3_bsize );
}
void FLASH_Apply_Q_UT_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_apqut_cntl_leaf );
	FLA_Cntl_obj_free( flash_apqut_cntl );
	FLA_Cntl_obj_free( flash_apqut_cntl_blas );

	FLA_Blocksize_free( flash_apqut_var1_bsize );
	FLA_Blocksize_free( flash_apqut_var2_bsize );
}
Ejemplo n.º 3
0
void FLA_Apply_Q_UT_cntl_finalize()
{
    FLA_Cntl_obj_free( fla_apqut_cntl_leaf );
    /*
    	FLA_Cntl_obj_free( fla_apqut_cntl );
    */

    FLA_Blocksize_free( fla_apqut_var1_bsize );
    FLA_Blocksize_free( fla_apqut_var2_bsize );
}
Ejemplo n.º 4
0
void FLA_LU_piv_cntl_finalize()
{
	FLA_Cntl_obj_free( fla_lu_piv_cntl );
	FLA_Cntl_obj_free( fla_lu_piv_cntl2 );
	FLA_Cntl_obj_free( fla_lu_piv_cntl_in );
	FLA_Cntl_obj_free( fla_lu_piv_cntl_leaf );

	FLA_Blocksize_free( fla_lu_piv_var5_bsize );
	FLA_Blocksize_free( fla_lu_piv_var5_bsize_in );
}
Ejemplo n.º 5
0
void FLA_Her2k_cntl_finalize()
{
	FLA_Cntl_obj_free( fla_her2k_cntl_blas );

	FLA_Cntl_obj_free( fla_her2k_cntl_ip );
	FLA_Cntl_obj_free( fla_her2k_cntl_op );
	FLA_Cntl_obj_free( fla_her2k_cntl_mm );

	FLA_Blocksize_free( fla_her2k_var3_bsize );
	FLA_Blocksize_free( fla_her2k_var9_bsize );
}
Ejemplo n.º 6
0
void FLA_Trsm_cntl_finalize()
{
	FLA_Cntl_obj_free( fla_trsm_cntl_blas );

	FLA_Cntl_obj_free( fla_trsm_cntl_bp );
	FLA_Cntl_obj_free( fla_trsm_cntl_mp );
	FLA_Cntl_obj_free( fla_trsm_cntl_mm );

	FLA_Blocksize_free( fla_trsm_var2_bsize );
	FLA_Blocksize_free( fla_trsm_var3_bsize );
}
Ejemplo n.º 7
0
void FLA_Hemm_cntl_finalize()
{
	FLA_Cntl_obj_free( fla_hemm_cntl_blas );

	FLA_Cntl_obj_free( fla_hemm_cntl_bp );
	FLA_Cntl_obj_free( fla_hemm_cntl_mp );
	FLA_Cntl_obj_free( fla_hemm_cntl_mm );

	FLA_Blocksize_free( fla_hemm_var1_bsize );
	FLA_Blocksize_free( fla_hemm_var9_bsize );
}
Ejemplo n.º 8
0
void FLASH_LQ_UT_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_lqut_cntl_leaf );
	FLA_Cntl_obj_free( flash_lqut_cntl );

	FLA_Blocksize_free( flash_lqut_var3_bsize );
}
Ejemplo n.º 9
0
void FLASH_QR2_UT_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_qr2ut_cntl_leaf );
	FLA_Cntl_obj_free( flash_qr2ut_cntl );

	FLA_Blocksize_free( flash_qr2ut_var2_bsize );
}
Ejemplo n.º 10
0
void FLASH_Eig_gest_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_eig_gest_cntl_leaf );
	FLA_Cntl_obj_free( flash_eig_gest_cntl );

	FLA_Blocksize_free( flash_eig_gest_bsize );
}
Ejemplo n.º 11
0
void FLASH_Ttmm_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_ttmm_cntl_leaf );
	FLA_Cntl_obj_free( flash_ttmm_cntl );

	FLA_Blocksize_free( flash_ttmm_bsize );
}
Ejemplo n.º 12
0
void FLASH_UDdate_UT_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_uddateut_cntl_leaf );
	FLA_Cntl_obj_free( flash_uddateut_cntl );

	FLA_Blocksize_free( flash_uddateut_var2_bsize );
}
Ejemplo n.º 13
0
void libfla_test_symm_cntl_free( void )
{
	FLA_Blocksize_free( symm_cntl_bsize );

	FLA_Cntl_obj_free( symm_cntl_unb );
	FLA_Cntl_obj_free( symm_cntl_blk );
}
Ejemplo n.º 14
0
void FLASH_LU_piv_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_lu_piv_cntl_leaf );
	FLA_Cntl_obj_free( flash_lu_piv_cntl );

	FLA_Blocksize_free( flash_lu_piv_bsize );
}
Ejemplo n.º 15
0
void FLA_CAQR2_UT_cntl_finalize()
{
	FLA_Cntl_obj_free( fla_caqr2ut_cntl_unb );
	FLA_Cntl_obj_free( fla_caqr2ut_cntl_leaf );

	FLA_Blocksize_free( fla_caqr2ut_var1_bsize );
}
Ejemplo n.º 16
0
void FLASH_Sylv_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_sylv_cntl_leaf );
	FLA_Cntl_obj_free( flash_sylv_cntl_mb );
	FLA_Cntl_obj_free( flash_sylv_cntl );

	FLA_Blocksize_free( flash_sylv_bsize );
}
void FLASH_Apply_pivots_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_appiv_cntl_leaf );
	FLA_Cntl_obj_free( flash_appiv_cntl_bp );
	FLA_Cntl_obj_free( flash_appiv_cntl );

	FLA_Blocksize_free( flash_appiv_bsize );
}
Ejemplo n.º 18
0
void libfla_test_eig_gest_cntl_free( void )
{
	FLA_Blocksize_free( eig_gest_cntl_bsize );

	FLA_Cntl_obj_free( eig_gest_cntl_unb );
	FLA_Cntl_obj_free( eig_gest_cntl_opt );
	FLA_Cntl_obj_free( eig_gest_cntl_blk );
}
Ejemplo n.º 19
0
void FLASH_Scalr_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_scalr_cntl_blas );

	FLA_Cntl_obj_free( flash_scalr_cntl );

	FLA_Blocksize_free( flash_scalr_bsize );
}
Ejemplo n.º 20
0
void FLASH_Trsv_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_trsv_cntl_blas );

	FLA_Cntl_obj_free( flash_trsv_cntl );

	FLA_Blocksize_free( flash_trsv_bsize );
}
Ejemplo n.º 21
0
void libfla_test_lu_piv_cntl_free( void )
{
	FLA_Blocksize_free( lu_piv_cntl_bsize );

	FLA_Cntl_obj_free( lu_piv_cntl_unb );
	FLA_Cntl_obj_free( lu_piv_cntl_opt );
	FLA_Cntl_obj_free( lu_piv_cntl_blk );
}
Ejemplo n.º 22
0
void FLASH_Copyr_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_copyr_cntl_blas );

	FLA_Cntl_obj_free( flash_copyr_cntl );

	FLA_Blocksize_free( flash_copyr_bsize );
}
Ejemplo n.º 23
0
void libfla_test_qrut_cntl_free( void )
{
	FLA_Blocksize_free( qrut_cntl_bsize );

	FLA_Cntl_obj_free( apqut_cntl_blk );
	FLA_Cntl_obj_free( qrut_cntl_unb );
	FLA_Cntl_obj_free( qrut_cntl_opt );
	FLA_Cntl_obj_free( qrut_cntl_blk );
	FLA_Cntl_obj_free( qrut_cntl_blk_sub );
}
Ejemplo n.º 24
0
void FLASH_Trsm_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_trsm_cntl_blas );

	FLA_Cntl_obj_free( flash_trsm_cntl_bp );
	FLA_Cntl_obj_free( flash_trsm_cntl_mp );
	FLA_Cntl_obj_free( flash_trsm_cntl_mm );

	FLA_Blocksize_free( flash_trsm_bsize );
}
Ejemplo n.º 25
0
void FLASH_Syrk_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_syrk_cntl_blas );

	FLA_Cntl_obj_free( flash_syrk_cntl_ip );
	FLA_Cntl_obj_free( flash_syrk_cntl_op );
	FLA_Cntl_obj_free( flash_syrk_cntl_mm );

	FLA_Blocksize_free( flash_syrk_bsize );
}
Ejemplo n.º 26
0
void FLASH_Gemm_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_gemm_cntl_blas );

	FLA_Cntl_obj_free( flash_gemm_cntl_pb_bb );
	FLA_Cntl_obj_free( flash_gemm_cntl_bp_bb );
	FLA_Cntl_obj_free( flash_gemm_cntl_ip_bb );

	FLA_Cntl_obj_free( flash_gemm_cntl_mp_ip );
	FLA_Cntl_obj_free( flash_gemm_cntl_op_bp );
	FLA_Cntl_obj_free( flash_gemm_cntl_pm_ip );
	FLA_Cntl_obj_free( flash_gemm_cntl_op_pb );
	FLA_Cntl_obj_free( flash_gemm_cntl_mp_pb );
	FLA_Cntl_obj_free( flash_gemm_cntl_pm_bp );

	FLA_Cntl_obj_free( flash_gemm_cntl_mm_pm );
	FLA_Cntl_obj_free( flash_gemm_cntl_mm_mp );
	FLA_Cntl_obj_free( flash_gemm_cntl_mm_op );

	FLA_Blocksize_free( flash_gemm_bsize );
}
Ejemplo n.º 27
0
void FLASH_UDdate_UT_inc_cntl_finalize()
{
	FLA_Cntl_obj_free( flash_uddateutinc_cntl );

	FLA_Blocksize_free( flash_uddateutinc_var1_bsize );
}
Ejemplo n.º 28
0
void time_Her2k_ln( 
               int variant, int type, int nrepeats, int n, int nb_alg,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9; 

  FLA_Obj
    C_old;

  fla_blocksize_t*
    bp;
  fla_gemm_t*
    cntl_gemm_blas;
  fla_her2k_t*
    cntl_her2k_blas;
  fla_her2k_t*
    cntl_her2k_var;

  bp              = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
  cntl_gemm_blas  = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
  cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );
  cntl_her2k_var  = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas );

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < nrepeats; irep++ )
  {

    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( variant ){
    case 0:
      // Time reference implementation
      REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );
      break;

    case 1:{
      // Time variant 1
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 2:{
      // Time variant 2
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 3:{
      // Time variant 3
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 4:{
      // Time variant 4
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 5:{
      // Time variant 5
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var5( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 6:{
      // Time variant 6
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var6( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 7:{
      // Time variant 7
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var7( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var7( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 8:{
      // Time variant 8
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var8( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var8( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 9:{
      // Time variant 9
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var9( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var9( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 10:{
      // Time variant 10
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Her2k_ln_unb_var10( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Her2k_ln_blk_var10( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  FLA_Cntl_obj_free( cntl_her2k_var );
  FLA_Cntl_obj_free( cntl_her2k_blas );
  FLA_Cntl_obj_free( cntl_gemm_blas );
  FLA_Blocksize_free( bp );

  if ( variant == 0 )
  {
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1e9;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}
Ejemplo n.º 29
0
void time_Sylv_nn(
                   int variant, int type, int n_repeats, int m, int n, int nb_alg,
                   FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,
                   double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old;

  fla_blocksize_t*
    bp;
  fla_sylv_t*
    cntl_sylv_var;
  fla_sylv_t*
    cntl_sylv_unb;
  fla_gemm_t*
    cntl_gemm_blas;

/*
  if( type == FLA_ALG_UNBLOCKED && n > 400 )
  {
    *gflops = 0.0;
    *diff   = 0.0;
    return;
  }
*/

  bp               = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
  cntl_sylv_unb    = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
  cntl_gemm_blas   = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL );
  cntl_sylv_var    = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas );

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < n_repeats; irep++ ){
    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( variant ){
    case 0:
      /* Time reference implementation */
      REF_Sylv_nn( isgn, A, B, C, scale );

      break;

    case 1:{

      /* Time variant 1 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 2:{

      /* Time variant 2 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 3:{

      /* Time variant 3 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 4:{

      /* Time variant 4 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var4( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var4( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 5:{

      /* Time variant 5 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var5( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var5( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 6:{

      /* Time variant 6 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var6( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var6( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 7:{

      /* Time variant 7 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var7( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var7( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 8:{

      /* Time variant 8 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var8( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var8( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 9:{

      /* Time variant 9 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var9( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var9( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 10:{

      /* Time variant 10 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var10( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var10( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 11:{

      /* Time variant 11 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var11( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var11( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 12:{

      /* Time variant 12 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var12( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var12( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 13:{

      /* Time variant 13 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var13( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var13( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 14:{

      /* Time variant 14 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var14( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var14( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 15:{

      /* Time variant 15 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var15( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var15( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 16:{

      /* Time variant 16 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var16( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var16( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 17:{

      /* Time variant 17 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var17( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var17( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    case 18:{

      /* Time variant 18 */
      switch( type ){
      case FLA_ALG_UNB_OPT:
        FLA_Sylv_nn_opt_var18( isgn, A, B, C, scale );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Sylv_nn_blk_var18( isgn, A, B, C, scale, cntl_sylv_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  FLA_Cntl_obj_free( cntl_sylv_var );
  FLA_Cntl_obj_free( cntl_sylv_unb );
  FLA_Cntl_obj_free( cntl_gemm_blas );
  FLA_Blocksize_free( bp );

  if ( variant == 0 ){
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else{
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }

  *gflops = ( m * m * n + n * n * m ) / 
            dtime_old / 1e9;

  if ( FLA_Obj_is_complex( C ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}
Ejemplo n.º 30
0
void time_Gemm_hh( 
               int variant, int type, int nrepeats, int n, int nb_alg,
               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
               double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    C_old;

  fla_blocksize_t*
    bp;
  fla_gemm_t*
    cntl_gemm_blas;
  fla_gemm_t*
    cntl_gemm_var;

  bp             = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );
  cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );
  cntl_gemm_var  = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas );

  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );

  FLA_Copy_external( C, C_old );


  for ( irep = 0 ; irep < nrepeats; irep++ ){
    FLA_Copy_external( C_old, C );

    *dtime = FLA_Clock();

    switch( variant ){
    // Time reference implementation
    case 0:
      REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, 
                FLA_ONE, A, B, FLA_ONE, C );
      break;

    // Time variant 1
    case 1:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 2
    case 2:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 3
    case 3:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 4
    case 4:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 5
    case 5:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var5( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var5( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    // Time variant 6
    case 6:{
      switch( type ){
      case FLA_ALG_UNBLOCKED:
        FLA_Gemm_hh_unb_var6( FLA_ONE, A, B, FLA_ONE, C );
        break;
      case FLA_ALG_BLOCKED:
        FLA_Gemm_hh_blk_var6( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );
        break;
      default:
        printf("trouble\n");
      }

      break;
    }

    }
	
    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );
  }

  FLA_Cntl_obj_free( cntl_gemm_var );
  FLA_Cntl_obj_free( cntl_gemm_blas );
  FLA_Blocksize_free( bp );

  if ( variant == 0 )
  {
    FLA_Copy_external( C, C_ref );
    *diff = 0.0;
  }
  else
  {
    *diff = FLA_Max_elemwise_diff( C, C_ref );
  }

  *gflops = 2.0 * 
            FLA_Obj_length( C ) * 
            FLA_Obj_width( C ) * 
            FLA_Obj_width( A ) / 
            dtime_old / 
            1.0e9;

  if ( FLA_Obj_is_complex( C ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLA_Copy_external( C_old, C );

  FLA_Obj_free( &C_old );
}