static double RunTest(real_Double_t *t_, struct user_parameters* params) { double t; PLASMA_desc *descT; int64_t N = params->matrix_size; int64_t IB = params->iblocksize; int64_t NB = params->blocksize; int check = params->check; double check_res = 0; /* Allocate Data */ PLASMA_desc *descA = NULL; double *ptr = (double*)malloc(N * N * sizeof(double)); PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N); #pragma omp parallel { #pragma omp single { plasma_pdpltmg_quark(*descA, 5373 ); } } /* Save A for check */ double *A = NULL; if ( check ) { A = (double*)malloc(N * N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descA, (void*)A, N); } /* Allocate Workspace */ plasma_alloc_ibnb_tile(N, N, PlasmaRealDouble, &descT, IB, NB); /* Do the computations */ START_TIMING(); #pragma omp parallel { #pragma omp single { plasma_pdgeqrf_quark( *descA, *descT , IB); } } STOP_TIMING(); /* Check the solution */ if ( check ) { /* Allocate B for check */ PLASMA_desc *descB = NULL; double* ptr = (double*)malloc(N * sizeof(double)); PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1); /* Initialize and save B */ plasma_pdpltmg_seq(*descB, 2264 ); double *B = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)B, N); /* Compute the solution */ PLASMA_dgeqrs_Tile( descA, descT, descB , IB); /* Copy solution to X */ double *X = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)X, N); check_res = d_check_solution(N, N, 1, A, N, B, X, N); /* Free checking structures */ PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } /* Free data */ PLASMA_Dealloc_Handle_Tile(&descT); PASTE_CODE_FREE_MATRIX( descA ); return check_res; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x; PLASMA_desc *descA, *descB, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); #if defined(PLASMA_CUDA) core_cublas_init(); #endif /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable); #endif /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT); #if defined(PLASMA_CUDA) cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable); #endif /* Save AT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_dgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (double *)malloc(ldb*nrhs *sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); x = (double *)malloc(ldb*nrhs *sizeof(double)); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_dgeqrs_Tile( descA, descT, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); #if defined(PLASMA_CUDA) #endif return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A, *b, *x; double *Acpy = NULL; double *bcpy = NULL; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; int iter = 0; /* Allocate Data */ A = (double *)malloc(lda*n* sizeof(double)); b = (double *)malloc(ldb*nrhs*sizeof(double)); x = (double *)malloc(ldb*nrhs*sizeof(double)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!A) || (!b) || (!x) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } */ /* Initialiaze Data */ LAPACKE_dlarnv_work(1, ISEED, lda*n, A); LAPACKE_dlarnv_work(1, ISEED, ldb*nrhs, b); /* Save A and b */ if (check) { Acpy = (double *)malloc(lda*n* sizeof(double)); bcpy = (double *)malloc(ldb*nrhs*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, b, ldb, bcpy, ldb); } t = -cWtime(); PLASMA_dsgesv( n, nrhs, A, lda, piv, b, ldb, x, ldb, &iter ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = d_check_solution(n, n, nrhs, Acpy, lda, bcpy, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(bcpy); } free( piv ); free( x ); free( b ); free( A ); PLASMA_Finalize(); return 0; }
static double RunTest(real_Double_t *t_, struct user_parameters* params) { double t; int64_t N = params->matrix_size; int64_t NB = params->blocksize; int check = params->check; int uplo = PlasmaUpper; double check_res = 0; /* Allocate Data */ PLASMA_desc *descA = NULL; double* ptr = malloc(N * N * sizeof(double)); PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N); #pragma omp parallel { #pragma omp single { plasma_pdplgsy_quark( (double)N, *descA, 51 ); } } /* Save A for check */ double *A = NULL; if(check) { A = (double*)malloc(N * N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descA, (void*)A, N); } /* PLASMA DPOSV */ START_TIMING(); #pragma omp parallel { #pragma omp single { plasma_pdpotrf_quark(uplo, *descA); } } STOP_TIMING(); /* Check the solution */ if ( check ) { PLASMA_desc *descB = NULL; double* ptr = (double*)malloc(N * sizeof(double)); PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1); plasma_pdpltmg_seq(* descB, 7672 ); double* B = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)B, N); PLASMA_dpotrs_Tile( uplo, descA, descB ); double* X = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)X, N); check_res = d_check_solution(N, N, 1, A, N, B, X, N); PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } PASTE_CODE_FREE_MATRIX( descA ); return check_res; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x = NULL, *xT; real_Double_t t; PLASMA_desc *descA, *descB, *descX; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; int iter; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); xT = (double *)malloc(nt*nb2 *sizeof(double)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!xT) ) { printf("Out of Memory \n "); exit(0); } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Desc_Create(&descX, xT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_dplgsy_Tile((double)n, descA, 51 ); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); b = (double *)malloc(ldb*nrhs *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); PLASMA_Tile_to_Lapack(descB, (void*)b, n); } /* PLASMA DSPOSV */ t = -cWtime(); PLASMA_dsposv_Tile(PlasmaUpper, descA, descB, descX, &iter); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { x = (double *)malloc(ldb*nrhs *sizeof(double)); PLASMA_Tile_to_Lapack(descX, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descX); free(AT); free(bT); free(xT); PLASMA_Finalize(); return 0; }