Example #1
0
File: P.c Project: nashp/HiPLARb
int P_zunmqr(
const char *side,
const char *trans,
int M,
int N,
int K,
void *A,
int LDA,
void *T,
void *B,
int LDB
) {
	PLASMA_enum s, t;
	int info;

	if (*side == 'L') {
		s = PlasmaLeft;
	} else {
		s = PlasmaRight;
	}

	if (*trans == 'C') {
		t = PlasmaConjTrans;
	} else {
		t = PlasmaNoTrans;
	}

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, T, B, LDB);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, descT, B, LDB);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
Example #2
0
File: P.c Project: nashp/HiPLARb
int P_dgeqrf(
int M,
int N,
double *A,
double *T
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_dgeqrf(M, N, A, M, T);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaRealDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
/*
    printf("MB=%d NB=%d BSIZ=%d LM=%d LN=%d M=%d N=%d MT=%d NT=%d\n",
        descT->mb, descT->nb, descT->bsiz, descT->lm, descT->ln,
        descT->m, descT->n, descT->mt, descT->nt);
*/

	info = PLASMA_dgeqrf(M, N, A, M, descT);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);

}
Example #3
0
File: P.c Project: nashp/HiPLARb
int P_zungqr(
int M,
int N,
int K,
void *A,
int LDA,
void *T,
void *Q,
int LDQ
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zungqr(M, N, K, A, LDA, T, Q, LDQ);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zungqr(M, N, K, A, LDA, descT, Q, LDQ);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
Example #4
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *AT;
    real_Double_t       t;
    PLASMA_desc        *descA;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int check = iparam[TIMING_CHECK];
    PLASMA_enum uplo = PlasmaLower;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT = (double *)malloc(nt*nt*nb2*sizeof(double));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* 
     * Initialize Data 
     * It's done in static to avoid having the same sequence than one 
     * the function we want to trace
     */
    PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );
    PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    PLASMA_dplgsy_Tile( (double)n, descA, 51 );

    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /* Save AT in lapack layout for check */
    if ( check ) {
    }

    /* PLASMA DPOTRF / DTRTRI / DLAUUM  */
    /*
     * Example of the different way to combine several asynchonous calls
     */
    {
#if defined(TRACE_BY_SEQUENCE)
        PLASMA_sequence *sequence[3];
        PLASMA_request request[3] = { PLASMA_REQUEST_INITIALIZER, 
                                      PLASMA_REQUEST_INITIALIZER, 
                                      PLASMA_REQUEST_INITIALIZER };
        
        PLASMA_Sequence_Create(&sequence[0]);
        PLASMA_Sequence_Create(&sequence[1]);
        PLASMA_Sequence_Create(&sequence[2]);
        
        t = -cWtime();
#if defined(POTRI_SYNC)
        PLASMA_dpotrf_Tile_Async(uplo, descA,                sequence[0], &request[0]);
        PLASMA_Sequence_Wait(sequence[0]);
        PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]);
        PLASMA_Sequence_Wait(sequence[1]);
        PLASMA_dlauum_Tile_Async(uplo, descA,                sequence[2], &request[2]);
        PLASMA_Sequence_Wait(sequence[2]);
#else
        PLASMA_dpotrf_Tile_Async(uplo, descA,                sequence[0], &request[0]);
        PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]);
        PLASMA_dlauum_Tile_Async(uplo, descA,                sequence[2], &request[2]);
        PLASMA_Sequence_Wait(sequence[0]);
        PLASMA_Sequence_Wait(sequence[1]);
        PLASMA_Sequence_Wait(sequence[2]);
#endif
        t += cWtime();
        
        PLASMA_Sequence_Destroy(sequence[0]);
        PLASMA_Sequence_Destroy(sequence[1]);
        PLASMA_Sequence_Destroy(sequence[2]);
        
#else

#if defined(POTRI_SYNC)
        
        t = -cWtime();
        PLASMA_dpotrf_Tile(uplo, descA);
        PLASMA_dtrtri_Tile(uplo, PlasmaNonUnit, descA);
        PLASMA_dlauum_Tile(uplo, descA);
        t += cWtime();
        
#else
        /* Default: we use Asynchonous call with only one sequence */
        
        PLASMA_sequence *sequence;
        PLASMA_request request[2] = { PLASMA_REQUEST_INITIALIZER, 
                                      PLASMA_REQUEST_INITIALIZER };
        
        t = -cWtime();
        PLASMA_Sequence_Create(&sequence);
        PLASMA_dpotrf_Tile_Async(uplo, descA, sequence, &request[0]);
        PLASMA_dpotri_Tile_Async(uplo, descA, sequence, &request[1]);
        PLASMA_Sequence_Wait(sequence);
        t += cWtime();
        
        PLASMA_Sequence_Destroy(sequence);       
#endif
#endif
        *t_ = t;
    }
    
    /* Check the solution */
    if ( check )
    {
        dparam[TIMING_ANORM] = 0.0;
        dparam[TIMING_XNORM] = 0.0;
        dparam[TIMING_BNORM] = 0.0;
        dparam[TIMING_RES]   = 0.0;
    }

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Finalize();
    free(AT);

    return 0;
}
Example #5
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x;
    PLASMA_desc        *descA, *descB, *descL;
    real_Double_t       t;
    int                *piv;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda      = n;
    int ldb      = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv);

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (PLASMA_Complex64_t *)malloc(lda*n    *sizeof(PLASMA_Complex64_t));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    t = -cWtime();
    PLASMA_zgetrf_incpiv_Tile( descA, descL, piv );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        b  = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t));
        bT = (PLASMA_Complex64_t *)malloc(nt*nb2   *sizeof(PLASMA_Complex64_t));
        x  = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t));

        LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b);
        PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Lapack_to_Tile((void*)b, n, descB);

        PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));

        PLASMA_Desc_Destroy(&descB);
        free( A ); free( b ); free( bT ); free( x );
      }

    /* Deallocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descL);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    free( piv );
    PLASMA_Finalize();

    return 0;
}
Example #6
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *AT, *BT, *CT;
    float *A = NULL, *B = NULL, *C1 = NULL, *C2 = NULL;
    float alpha, beta;
    PLASMA_desc        *descA, *descB, *descC;
    real_Double_t       t;
    int nb, nb2, nt;
    int n       = iparam[TIMING_N];
    int check   = iparam[TIMING_CHECK];
    int lda     = n;
    
    /* Allocate Data */
    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    /* } */
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);

    AT = (float *)malloc(nt*nt*nb2*sizeof(float));
    BT = (float *)malloc(nt*nt*nb2*sizeof(float));
    CT = (float *)malloc(nt*nt*nb2*sizeof(float));

    /* Check if unable to allocate memory */
    if ( (!AT) || (!BT) || (!CT) ) {
        printf("Out of Memory \n ");
        exit(0);
    }
  
#if defined(PLASMA_CUDA)
    cudaHostRegister(AT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable);
    cudaHostRegister(BT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable);
    cudaHostRegister(CT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable);
#endif

     /* Initialiaze Data */
    LAPACKE_slarnv_work(1, ISEED, 1, &alpha);
    LAPACKE_slarnv_work(1, ISEED, 1, &beta);
    LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, AT);
    LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, BT);
    LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, CT);

    /* Initialize AT and bT for Symmetric Positif Matrix */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n);
    PLASMA_Desc_Create(&descB, BT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n);
    PLASMA_Desc_Create(&descC, CT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n);

    if (check)
      {
          C2 = (float *)malloc(n*lda*sizeof(float));
          PLASMA_Tile_to_Lapack(descC, (void*)C2, n);
      }

#if defined(PLASMA_CUDA)
    core_cublas_init();
#endif

    t = -cWtime();
    PLASMA_sgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if (check)
      {
          A = (float *)malloc(n*lda*sizeof(float));
          PLASMA_Tile_to_Lapack(descA, (void*)A, n);
          free(AT);

          B = (float *)malloc(n*lda*sizeof(float));
          PLASMA_Tile_to_Lapack(descB, (void*)B, n);
          free(BT);

          C1 = (float *)malloc(n*lda*sizeof(float));
          PLASMA_Tile_to_Lapack(descC, (void*)C1, n);
          free(CT);

          dparam[TIMING_RES] = s_check_gemm( PlasmaNoTrans, PlasmaNoTrans, n, n, n, 
                                            alpha, A, lda, B, lda, beta, C1, C2, lda,
                                            &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                            &(dparam[TIMING_XNORM]));
          free(C2);
      }
    else {
        free( AT );
        free( BT );
        free( CT );
    }

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Desc_Destroy(&descB);
    PLASMA_Desc_Destroy(&descC);
    PLASMA_Finalize();

    return 0;
}
Example #7
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *A = NULL, *AT, *b = NULL, *bT, *x;
    PLASMA_desc        *descA, *descB, *descT;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );
  
#if defined(PLASMA_CUDA)
  core_cublas_init();
#endif

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (double *)malloc(nt*nt*nb2*sizeof(double));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT);
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (double *)malloc(lda*n    *sizeof(double));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    t = -cWtime();
    PLASMA_dgeqrf_Tile( descA, descT );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        b  = (double *)malloc(ldb*nrhs *sizeof(double));
        bT = (double *)malloc(nt*nb2   *sizeof(double));
        x  = (double *)malloc(ldb*nrhs *sizeof(double));

        LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT);
        PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Tile_to_Lapack(descB, (void*)b, n);

        PLASMA_dgeqrs_Tile( descA, descT, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));

        PLASMA_Desc_Destroy(&descB);
        free( A ); 
        free( b ); 
        free( bT ); 
        free( x );
      }

    /* Allocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    PLASMA_Finalize();
#if defined(PLASMA_CUDA)
#endif
    return 0;
}
Example #8
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *AT, *Q = NULL;
    float *W;
    PLASMA_desc *descA = NULL;
    PLASMA_desc *descQ = NULL;
    PLASMA_desc *descT = NULL;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int uplo = PlasmaUpper;
    int vec  = PlasmaNoVec;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );

    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );

    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (float *)malloc(lda*n*sizeof(float));
    W   = (float *)malloc(n*sizeof(float));
    if (vec == PlasmaVec){
       Q = (float *)malloc(lda*n*sizeof(float));
       if ( (!Q) ) {
          printf("Out of Memory -Q-\n ");
          return -2;
       }
    }
       
    /* Check if unable to allocate memory */
    if ( (!AT) || (!W) ) {
        printf("Out of Memory -\n ");
        return -2;
    }

    /* Initialize Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_splgsy_Tile((float)0.0, descA, 51 );

    if (vec == PlasmaVec)
      PLASMA_Desc_Create(&descQ, Q, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
    }

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_ssyev(n, n, &descT);

    t = -cWtime();
    PLASMA_ssyev_Tile( vec, uplo, descA, W, descT, descQ );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
      }

    /* DeAllocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);
 
    if (vec == PlasmaVec) {
      PLASMA_Desc_Destroy(&descQ);
      free( Q );
    }
    free( AT );
    free( W );
    PLASMA_Finalize();

    return 0;
}
Example #9
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *AT, *bT, *x;
    float *A = NULL;
    float *b = NULL;
    PLASMA_desc        *descA, *descB;
    real_Double_t       t;
    int                *piv;
    int n     = iparam[TIMING_N];
    int nb    = iparam[TIMING_NB];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    
    /* Allocate Data */
    AT  = (float *)malloc(lda*n   *sizeof(float));
    bT  = (float *)malloc(ldb*nrhs*sizeof(float));
    piv = (int *)malloc( n*sizeof(int));

    /* Check if unable to allocate memory */
    if ( (!AT) || (!bT) || (!piv) ) {
        printf("Out of Memory \n ");
        return -1;
    }

    /* Initialize AT and bT for Symmetric Positif Matrix */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n,    0, 0, n, n);
    PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs);
    LAPACKE_slarnv_work(1, ISEED, lda*n,    AT);
    LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, bT);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
        A = (float *)malloc(lda*n   *sizeof(float));
        b = (float *)malloc(ldb*nrhs*sizeof(float));
        PLASMA_sTile_to_Lapack(descA, (void*)A, lda);
        PLASMA_sTile_to_Lapack(descB, (void*)b, ldb);
    }

    t = -cWtime();
    PLASMA_sgesv_Tile( descA, piv, descB );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        x = (float *)malloc(ldb*nrhs *sizeof(float));
        PLASMA_sTile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));
        free(A); free(b); free(x);
      }

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Desc_Destroy(&descB);

    free( AT ); free( bT );
    free( piv );
    PLASMA_Finalize();

    return 0;
}
Example #10
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    PLASMA_Complex32_t *A, *AT, *A2 = NULL;
    PLASMA_desc        *descA;
    real_Double_t       t;
    int                *ipiv, *ipiv2 = NULL;
    int i;
    int nb    = iparam[TIMING_NB];
    int m     = iparam[TIMING_N];
    int n     = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda   = m;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );

    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );

    /* Allocate Data */
    A  = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t));
    AT = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t));

    /* Check if unable to allocate memory */
    if ( ( !AT ) || (! A) ) {
        printf("Out of Memory \n ");
        return -1;
    }

    /* Initialiaze Data */
    LAPACKE_clarnv_work(1, ISEED, lda*n, A);
/*     for(i=0; i<n; i++) { */
/*       A[i*lda+i] += (float)m; */
/*     } */

    PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, m, n);
    PLASMA_cLapack_to_Tile((void*)A, lda, descA);

    /* Allocate Workspace */
    ipiv  = (int *)malloc( n*sizeof(int) );

    /* Save AT in lapack layout for check */
    if ( check ) {
        A2 = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t));
        ipiv2 = (int *)malloc( n*sizeof(int) );
        LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda);
    
        LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 );
    }

    plasma = plasma_context_self();
    PLASMA_Sequence_Create(&sequence);
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] );

    plasma_dynamic_spawn();
    CORE_cgetrf_rectil_init();

    t = -cWtime();
    QUARK_CORE_cgetrf_rectil(plasma->quark, &task_flags,
                             *descA, AT, descA->mb*descA->nb, ipiv,
                             sequence, &request,
                             0, 0,
                             iparam[TIMING_THRDNBR]);
    PLASMA_Sequence_Wait(sequence);
    t += cWtime();
    *t_ = t;
    
    PLASMA_Sequence_Destroy(sequence);

    /* Check the solution */
    if ( check )
    {
        float *work = (float *)malloc(max(m,n)*sizeof(float));

        PLASMA_cTile_to_Lapack(descA, (void*)A, lda);

        /* Check ipiv */
        for(i=0; i<n; i++)
        {
            if( ipiv[i] != ipiv2[i] ) {
                fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n",
                        i, ipiv[i],  i, crealf(A[  i * lda + i ]), 
                        i, ipiv2[i], i, crealf(A2[ i * lda + i ])); 
                break;
            }
        }

        dparam[TIMING_ANORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), 
                                                   m, n, A, lda, work);
        dparam[TIMING_XNORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), 
                                                   m, n, A2, lda, work);
        dparam[TIMING_BNORM] = 0.0;

        CORE_caxpy( m, n, -1.0, A, lda, A2, lda);

        dparam[TIMING_RES] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), 
                                                 m, n, A2, lda, work);

        free( A2 );
        free( ipiv2 );
        free( work );
    }
    
    /* Deallocate Workspace */
    PLASMA_Desc_Destroy(&descA);

    free( A  );
    free( AT );
    free( ipiv );
    PLASMA_Finalize();

    return 0;
}
Example #11
0
static int
RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) 
{
    PLASMA_Complex32_t *A = NULL, *AT;
    PLASMA_desc        *descA;
    real_Double_t       t;
    int n       = iparam[TIMING_N];
    int nb      = iparam[TIMING_NB];
    int check   = iparam[TIMING_CHECK];

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] );
    /* } */

    n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ;
    dparam[TIMING_ANORM] = (_PREC)n;

    /* Allocate Data */
    AT = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t));

    /* Check if unable to allocate memory */
    if ( (!AT) ) {
        printf("Out of Memory \n ");
        exit(0);
    }
    
     /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_clarnv_work(1, ISEED, n*n, AT);

    /* Save A and b  */
    if (check) {
        A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t));
        LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, AT, n, A, n);
    }

    t = -cWtime();
    PLASMA_Lapack_to_Tile( (void *)A, n, descA);
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if (check)
      {
        dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, A, AT, map_CM, map_CCRB);
        free(A);
      }

    PLASMA_Desc_Destroy(&descA);
    free( AT );

    PLASMA_Finalize();

    return 0;
}
Example #12
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    PLASMA_Complex64_t *AT;
    PLASMA_desc        *descA, *descT;
    real_Double_t       t;
    int nb;
    int M     = iparam[TIMING_N];
    int N     = iparam[TIMING_M];
    //int N = M/nrhs; //RUN WITH NRHS = 10 or 20 (ALSO USED IN TIMING.C)
    int lda = M;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];

    /* Householder mode */
    //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER);
    PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER);
    PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex64_t *)malloc(lda*N*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N);
    LAPACKE_zlarnv_work(1, ISEED, lda*N, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT);

    t = -cWtime();
    PLASMA_zgeqrf_Tile( descA, descT );
    t += cWtime();
    *t_ = t;
    
    /* Allocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    PLASMA_Finalize();

    return 0;
}
Example #13
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *A = NULL, *AT, *b = NULL, *bT, *x;
    real_Double_t       t;
    PLASMA_desc        *descA, *descB;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT = (float *)malloc(nt*nt*nb2*sizeof(float));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n);
    PLASMA_splgsy_Tile((float)n, descA, 51 );

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (float *)malloc(lda*n    *sizeof(float));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    /* PLASMA SPOSV */
    t = -cWtime();
    PLASMA_spotrf_Tile(PlasmaUpper, descA);
    t += cWtime();
    *t_ = t;

    /* Check the solution */
    if ( check )
      {
        b  = (float *)malloc(ldb*nrhs *sizeof(float));
        bT = (float *)malloc(nt*nb2   *sizeof(float));
        x  = (float *)malloc(ldb*nrhs *sizeof(float));

        LAPACKE_slarnv_work(1, ISEED, nt*nb2, bT);
        PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Tile_to_Lapack(descB, (void*)b, n);

        PLASMA_spotrs_Tile( PlasmaUpper, descA, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));
        PLASMA_Desc_Destroy(&descB);
        free( A );
        free( b );
        free( bT );
        free( x );
      }

    PLASMA_Desc_Destroy(&descA);

    free(AT);
    PLASMA_Finalize();

    return 0;
}
Example #14
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    PLASMA_Complex32_t *AT, *BT, *Q = NULL;
    float *W;
    PLASMA_desc *descA, *descB, *descQ, *descT;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int itype = 1;
    int vec   = PlasmaNoVec;
    int uplo  = PlasmaUpper;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    //    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    /* else */
    /*     PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); */

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
    BT  = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
    W  = (float *)malloc(n*sizeof(float));
    if (vec == PlasmaVec){
       Q = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
       if ( (!Q) ) {
          printf("Out of Memory -Q-\n ");
          exit(0);
       }
    }
       
    /* Check if unable to allocate memory */
    if ( (!AT) || (!BT) || (!W) ) {
        printf("Out of Memory -\n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_cplghe_Tile((float)0.0, descA, 51 );

    PLASMA_Desc_Create(&descB, BT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_cplghe_Tile((float)n, descB, 51 );

    PLASMA_Desc_Create(&descQ, Q, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
    }

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_chegv(n, n, &descT);

    t = -cWtime();
    PLASMA_chegv_Tile( itype, vec, uplo, descA, descB, W, descT, descQ );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
      }

    /* DeAllocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Desc_Destroy(&descB);
    PLASMA_Desc_Destroy(&descQ);

    if (vec == PlasmaVec)
       free( Q );
    free( AT );
    free( W );
    PLASMA_Finalize();

    return 0;
}