extern "C" void plasma_zheevd_wrapper(int32_t matrix_size, void* a, int32_t lda, void* z,
                                      int32_t ldz, double* eval)
{
    PLASMA_desc* descT;

    PLASMA_Alloc_Workspace_zheevd(matrix_size, matrix_size, &descT);

    int info = PLASMA_zheevd(PlasmaVec, PlasmaUpper, matrix_size, (PLASMA_Complex64_t*)a, lda, eval, descT, (PLASMA_Complex64_t*)z, ldz);
    if (info != 0)
    {
        printf("erorr calling PLASMA_zheevd\n");
        exit(0);
    }

    PLASMA_Dealloc_Handle_Tile(&descT);
}
Example #2
0
File: P.c Project: nashp/HiPLARb
int P_dgeqrf(
int M,
int N,
double *A,
double *T
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_dgeqrf(M, N, A, M, T);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaRealDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
/*
    printf("MB=%d NB=%d BSIZ=%d LM=%d LN=%d M=%d N=%d MT=%d NT=%d\n",
        descT->mb, descT->nb, descT->bsiz, descT->lm, descT->ln,
        descT->m, descT->n, descT->mt, descT->nt);
*/

	info = PLASMA_dgeqrf(M, N, A, M, descT);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);

}
Example #3
0
File: P.c Project: nashp/HiPLARb
int P_zungqr(
int M,
int N,
int K,
void *A,
int LDA,
void *T,
void *Q,
int LDQ
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zungqr(M, N, K, A, LDA, T, Q, LDQ);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zungqr(M, N, K, A, LDA, descT, Q, LDQ);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
Example #4
0
File: P.c Project: nashp/HiPLARb
int P_zgesvd(
const char *jobu,
const char *jobvt,
int M,
int N,
void *A,
int LDA,
double *S,
void *U,
int LDU,
void *VT,
int LDVT
) {
	PLASMA_enum ju, jvt;
	PLASMA_desc *descT;
	int info;

/*
	if (*jobu != 'N') {
		return(-1);
	}
	if (*jobvt != 'N') {
		return(-2);
	}
*/

	ju = PlasmaNoVec;
	jvt = PlasmaNoVec;

	PLASMA_Alloc_Workspace_zgesvd(M, N, &descT);

	info = PLASMA_zgesvd(ju, jvt, M, N, A, LDA, S, descT, U, LDU, VT, LDVT);

	PLASMA_Dealloc_Handle_Tile(&descT);

	return(info);
}
Example #5
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x;
    PLASMA_desc        *descA, *descB, *descL;
    real_Double_t       t;
    int                *piv;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda      = n;
    int ldb      = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv);

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (PLASMA_Complex64_t *)malloc(lda*n    *sizeof(PLASMA_Complex64_t));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    t = -cWtime();
    PLASMA_zgetrf_incpiv_Tile( descA, descL, piv );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        b  = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t));
        bT = (PLASMA_Complex64_t *)malloc(nt*nb2   *sizeof(PLASMA_Complex64_t));
        x  = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t));

        LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b);
        PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Lapack_to_Tile((void*)b, n, descB);

        PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));

        PLASMA_Desc_Destroy(&descB);
        free( A ); free( b ); free( bT ); free( x );
      }

    /* Deallocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descL);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    free( piv );
    PLASMA_Finalize();

    return 0;
}
Example #6
0
static double
RunTest(real_Double_t *t_, struct user_parameters* params)
{
    double t;
    PLASMA_desc *descT;
    int64_t N     = params->matrix_size;
    int64_t IB    = params->iblocksize;
    int64_t NB    = params->blocksize;
    int check     = params->check;
    double check_res = 0;

    /* Allocate Data */
    PLASMA_desc *descA = NULL;
    double *ptr = (double*)malloc(N * N * sizeof(double));
    PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N);

#pragma omp parallel
    {
#pragma omp single
        {
    plasma_pdpltmg_quark(*descA, 5373 );
        }
    }

    /* Save A for check */
    double *A = NULL;
    if ( check ) {
        A = (double*)malloc(N * N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descA, (void*)A, N);
    }

    /* Allocate Workspace */
    plasma_alloc_ibnb_tile(N, N, PlasmaRealDouble, &descT, IB, NB);

    /* Do the computations */
    START_TIMING();
#pragma omp parallel
    {
#pragma omp single
        {
    plasma_pdgeqrf_quark( *descA, *descT , IB);
        }
    }
    STOP_TIMING();

    /* Check the solution */
    if ( check )
    {
        /* Allocate B for check */
        PLASMA_desc *descB = NULL;
        double* ptr = (double*)malloc(N * sizeof(double));
        PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1);

        /* Initialize and save B */
        plasma_pdpltmg_seq(*descB, 2264 );
        double *B = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)B, N);

        /* Compute the solution */
        PLASMA_dgeqrs_Tile( descA, descT, descB , IB);

        /* Copy solution to X */
        double *X = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)X, N);

        check_res = d_check_solution(N, N, 1, A, N, B, X, N);

        /* Free checking structures */
        PASTE_CODE_FREE_MATRIX( descB );
        free( A );
        free( B );
        free( X );
    }

    /* Free data */
    PLASMA_Dealloc_Handle_Tile(&descT);
    PASTE_CODE_FREE_MATRIX( descA );

    return check_res;
}
Example #7
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *A = NULL, *AT, *b = NULL, *bT, *x;
    PLASMA_desc        *descA, *descB, *descT;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );
  
#if defined(PLASMA_CUDA)
  core_cublas_init();
#endif

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (double *)malloc(nt*nt*nb2*sizeof(double));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT);
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (double *)malloc(lda*n    *sizeof(double));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    t = -cWtime();
    PLASMA_dgeqrf_Tile( descA, descT );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        b  = (double *)malloc(ldb*nrhs *sizeof(double));
        bT = (double *)malloc(nt*nb2   *sizeof(double));
        x  = (double *)malloc(ldb*nrhs *sizeof(double));

        LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT);
        PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Tile_to_Lapack(descB, (void*)b, n);

        PLASMA_dgeqrs_Tile( descA, descT, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));

        PLASMA_Desc_Destroy(&descB);
        free( A ); 
        free( b ); 
        free( bT ); 
        free( x );
      }

    /* Allocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    PLASMA_Finalize();
#if defined(PLASMA_CUDA)
#endif
    return 0;
}
Example #8
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *AT, *Q = NULL;
    float *W;
    PLASMA_desc *descA = NULL;
    PLASMA_desc *descQ = NULL;
    PLASMA_desc *descT = NULL;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int uplo = PlasmaUpper;
    int vec  = PlasmaNoVec;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );

    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );

    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (float *)malloc(lda*n*sizeof(float));
    W   = (float *)malloc(n*sizeof(float));
    if (vec == PlasmaVec){
       Q = (float *)malloc(lda*n*sizeof(float));
       if ( (!Q) ) {
          printf("Out of Memory -Q-\n ");
          return -2;
       }
    }
       
    /* Check if unable to allocate memory */
    if ( (!AT) || (!W) ) {
        printf("Out of Memory -\n ");
        return -2;
    }

    /* Initialize Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_splgsy_Tile((float)0.0, descA, 51 );

    if (vec == PlasmaVec)
      PLASMA_Desc_Create(&descQ, Q, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
    }

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_ssyev(n, n, &descT);

    t = -cWtime();
    PLASMA_ssyev_Tile( vec, uplo, descA, W, descT, descQ );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
      }

    /* DeAllocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);
 
    if (vec == PlasmaVec) {
      PLASMA_Desc_Destroy(&descQ);
      free( Q );
    }
    free( AT );
    free( W );
    PLASMA_Finalize();

    return 0;
}
Example #9
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    PASTE_CODE_IPARAM_LOCALS( iparam );
    PLASMA_desc *descT;
    int jobu  = PlasmaNoVec;
    int jobvt = PlasmaNoVec;
    int INFO;

    /* Allocate Data */
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N );
    PASTE_CODE_ALLOCATE_MATRIX( VT, (jobvt == PlasmaVec), PLASMA_Complex64_t, N, N );
    PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == PlasmaVec), PLASMA_Complex64_t, M, M );
    PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 );

    /* Initialiaze Data */
    PLASMA_zplrnt_Tile(descA, 51 );

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
    }

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_zgesvd(N, N, &descT);

    if ( jobu == PlasmaVec ) {
        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M);
    }
    if ( jobvt == PlasmaVec ) {
        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N);
    }


    START_TIMING(); 
    INFO = PLASMA_zgesvd_Tile(jobu, jobvt, descA, S, descT, U, M, VT, N);
    STOP_TIMING();

    if(INFO!=0){
            printf(" ERROR OCCURED INFO %d\n",INFO);
    }


    /* Check the solution */
    if ( check )
      {
      }

    /* DeAllocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    if (jobu == PlasmaVec) {
      free( U );
    }
    if (jobvt == PlasmaVec) {
      free( VT );
    }
    PASTE_CODE_FREE_MATRIX( descA );
    free( S );

    return 0;
}
Example #10
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    PLASMA_Complex64_t *AT;
    PLASMA_desc        *descA, *descT;
    real_Double_t       t;
    int nb;
    int M     = iparam[TIMING_N];
    int N     = iparam[TIMING_M];
    //int N = M/nrhs; //RUN WITH NRHS = 10 or 20 (ALSO USED IN TIMING.C)
    int lda = M;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];

    /* Householder mode */
    //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER);
    PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER);
    PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex64_t *)malloc(lda*N*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N);
    LAPACKE_zlarnv_work(1, ISEED, lda*N, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT);

    t = -cWtime();
    PLASMA_zgeqrf_Tile( descA, descT );
    t += cWtime();
    *t_ = t;
    
    /* Allocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    PLASMA_Finalize();

    return 0;
}
Example #11
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    PLASMA_Complex32_t *AT, *BT, *Q = NULL;
    float *W;
    PLASMA_desc *descA, *descB, *descQ, *descT;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int itype = 1;
    int vec   = PlasmaNoVec;
    int uplo  = PlasmaUpper;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    //    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    /* else */
    /*     PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); */

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
    BT  = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
    W  = (float *)malloc(n*sizeof(float));
    if (vec == PlasmaVec){
       Q = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t));
       if ( (!Q) ) {
          printf("Out of Memory -Q-\n ");
          exit(0);
       }
    }
       
    /* Check if unable to allocate memory */
    if ( (!AT) || (!BT) || (!W) ) {
        printf("Out of Memory -\n ");
        exit(0);
    }

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_cplghe_Tile((float)0.0, descA, 51 );

    PLASMA_Desc_Create(&descB, BT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);
    PLASMA_cplghe_Tile((float)n, descB, 51 );

    PLASMA_Desc_Create(&descQ, Q, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
    }

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_chegv(n, n, &descT);

    t = -cWtime();
    PLASMA_chegv_Tile( itype, vec, uplo, descA, descB, W, descT, descQ );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
      }

    /* DeAllocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Desc_Destroy(&descB);
    PLASMA_Desc_Destroy(&descQ);

    if (vec == PlasmaVec)
       free( Q );
    free( AT );
    free( W );
    PLASMA_Finalize();

    return 0;
}
Example #12
0
int testing_dsygv(int argc, char **argv)
{
    /* Check for number of arguments*/
    if (argc != 3) {
        USAGE("HEGV", "N LDA LDB",
              "   - N    : size of the matrices A and B\n"
              "   - LDA  : leading dimension of the matrix A\n"
              "   - LDB  : leading dimension of the matrix B\n");
        return -1;
    }

    double      eps = LAPACKE_dlamch_work('e');
    PLASMA_enum vec = PlasmaNoVec;
    int    N        = atoi(argv[0]);
    int    LDA      = atoi(argv[1]);
    int    LDB      = atoi(argv[2]);
    int    LDQ      = LDA;
    int    LDAxN    = LDA*N;
    int    LDBxN    = LDB*N;
    int    LDQxN    = LDQ*N;

    int info_ortho     = 0;
    int info_solution  = 0;
    int info_reduction = 0;
    int i, u;

    double *A1    = (double *)malloc(LDAxN*sizeof(double));
    double *A2    = (double *)malloc(LDAxN*sizeof(double));
    double *B1    = (double *)malloc(LDBxN*sizeof(double));
    double *B2    = (double *)malloc(LDBxN*sizeof(double));
    double *Q     = (double *)malloc(LDQxN*sizeof(double));
    double *Ainit = (double *)malloc(LDAxN*sizeof(double));
    double *Binit = (double *)malloc(LDBxN*sizeof(double));
    double *W1 = (double *)malloc(N*sizeof(double));
    double *W2 = (double *)malloc(N*sizeof(double));
    double *work = (double *)malloc(3*N* sizeof(double));
    PLASMA_desc *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){
        printf("Out of Memory \n ");
        return -2;
    }

    /*
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE, 120);
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 20);
    */
    
    PLASMA_Enable(PLASMA_WARNINGS);
    PLASMA_Enable(PLASMA_ERRORS);
    PLASMA_Alloc_Workspace_dsygv(N, N, &T);

    /*----------------------------------------------------------
    *  TESTING DSYGV
    */

    /* Initialize A1 and Ainit */
    PLASMA_dplgsy(0., N, A1, LDA, 5198);
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA);

    /* Initialize B1 and Binit */
    PLASMA_dplgsy((double)N, N, B1, LDB, 4321 );
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA DSYGV ROUTINE -------  \n");
    printf("        Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /*----------------------------------------------------------
     *  TESTING DSYGV
     */

    for (i=0; i<3; i++) {
        for (u=0; u<2; u++) {
            LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA);

            memcpy(A2, Ainit, LDAxN*sizeof(double));
            memcpy(B2, Binit, LDBxN*sizeof(double));

            PLASMA_dsygv(itype[i], vec, uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ);

            /* Check the orthogonality, reduction and the eigen solutions */
            if (vec == PlasmaVec)
                info_ortho = check_orthogonality(N, N, Q, LDA, eps);
            /* 
             * WARNING: For now, Q is associated to Band tridiagonal reduction and 
             * not to the final tridiagonal reduction, so we can not call the check
             */
            if (0)
                info_reduction = check_reduction(itype[i], uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps);

            memcpy(A1, Ainit, LDAxN*sizeof(double));
            memcpy(B1, Binit, LDBxN*sizeof(double));

            LAPACKE_dsygv( LAPACK_COL_MAJOR, 
                     itype[i], lapack_const(vec), lapack_const(uplo[u]),
                     N, A1, LDA, B1, LDB, W1);

            /*info_solution  = check_solution(N, N, N, A1, LDA, B1, B2, LDB, eps);*/
            info_solution = check_solution(N, W1, W2, eps);
         
            if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) {
                printf("***************************************************\n");
                printf(" ---- TESTING DSYGV (%s, %s) ...................... PASSED !\n", itypestr[i], uplostr[u]);
                printf("***************************************************\n");
            }
            else {
                printf("************************************************\n");
                printf(" - TESTING DSYGV (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]);
                printf("************************************************\n");
            } 
        }
    }

    PLASMA_Dealloc_Handle_Tile(&T);
    free(A1); 
    free(A2); 
    free(B1); 
    free(B2); 
    free(Q); 
    free(Ainit); 
    free(Binit); 
    free(W1);
    free(W2);
    free(work);

    return 0;
}