static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x; real_Double_t t; PLASMA_desc *descA, *descB; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); #if defined(PLASMA_CUDA) core_cublas_init(); #endif /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ #if defined(PLASMA_CUDA) cudaHostAlloc((void**)&AT, nt*nt*nb2*sizeof(double), cudaHostAllocPortable); #else AT = (double *)malloc(nt*nt*nb2*sizeof(double)); #endif /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } // cudaHostRegister(AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable); /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_dplgsy_Tile((double)n, descA, 51 ); /* Save AT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } /* PLASMA DPOSV */ t = -cWtime(); PLASMA_dpotrf_Tile(PlasmaUpper, descA); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (double *)malloc(ldb*nrhs *sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); x = (double *)malloc(ldb*nrhs *sizeof(double)); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_dpotrs_Tile( PlasmaUpper, descA, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } PLASMA_Desc_Destroy(&descA); PLASMA_Finalize(); #if defined(PLASMA_CUDA) cudaFreeHost(AT); #else free(AT); #endif // cudaHostUnregister(AT); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *AT; real_Double_t t; PLASMA_desc *descA; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; PLASMA_enum uplo = PlasmaLower; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* * Initialize Data * It's done in static to avoid having the same sequence than one * the function we want to trace */ PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_dplgsy_Tile( (double)n, descA, 51 ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /* Save AT in lapack layout for check */ if ( check ) { } /* PLASMA DPOTRF / DTRTRI / DLAUUM */ /* * Example of the different way to combine several asynchonous calls */ { #if defined(TRACE_BY_SEQUENCE) PLASMA_sequence *sequence[3]; PLASMA_request request[3] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; PLASMA_Sequence_Create(&sequence[0]); PLASMA_Sequence_Create(&sequence[1]); PLASMA_Sequence_Create(&sequence[2]); t = -cWtime(); #if defined(POTRI_SYNC) PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[2]); #else PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_Sequence_Wait(sequence[2]); #endif t += cWtime(); PLASMA_Sequence_Destroy(sequence[0]); PLASMA_Sequence_Destroy(sequence[1]); PLASMA_Sequence_Destroy(sequence[2]); #else #if defined(POTRI_SYNC) t = -cWtime(); PLASMA_dpotrf_Tile(uplo, descA); PLASMA_dtrtri_Tile(uplo, PlasmaNonUnit, descA); PLASMA_dlauum_Tile(uplo, descA); t += cWtime(); #else /* Default: we use Asynchonous call with only one sequence */ PLASMA_sequence *sequence; PLASMA_request request[2] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; t = -cWtime(); PLASMA_Sequence_Create(&sequence); PLASMA_dpotrf_Tile_Async(uplo, descA, sequence, &request[0]); PLASMA_dpotri_Tile_Async(uplo, descA, sequence, &request[1]); PLASMA_Sequence_Wait(sequence); t += cWtime(); PLASMA_Sequence_Destroy(sequence); #endif #endif *t_ = t; } /* Check the solution */ if ( check ) { dparam[TIMING_ANORM] = 0.0; dparam[TIMING_XNORM] = 0.0; dparam[TIMING_BNORM] = 0.0; dparam[TIMING_RES] = 0.0; } PLASMA_Desc_Destroy(&descA); PLASMA_Finalize(); free(AT); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x = NULL, *xT; real_Double_t t; PLASMA_desc *descA, *descB, *descX; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; int iter; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); xT = (double *)malloc(nt*nb2 *sizeof(double)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!xT) ) { printf("Out of Memory \n "); exit(0); } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Desc_Create(&descX, xT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_dplgsy_Tile((double)n, descA, 51 ); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); b = (double *)malloc(ldb*nrhs *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); PLASMA_Tile_to_Lapack(descB, (void*)b, n); } /* PLASMA DSPOSV */ t = -cWtime(); PLASMA_dsposv_Tile(PlasmaUpper, descA, descB, descX, &iter); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { x = (double *)malloc(ldb*nrhs *sizeof(double)); PLASMA_Tile_to_Lapack(descX, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descX); free(AT); free(bT); free(xT); PLASMA_Finalize(); return 0; }